DeepMoA: method to predict the mechanism of action of cancer drugs

Select data and import libraries

import sys # we require code from other folders
import pandas as pd
import numpy as np
import itertools
import pickle
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
import seaborn as sns
import matplotlib.pyplot as plt
CB_color_cycle = ['#EECC16', '#62BB35', '#FDAE33','#208EA3', '#EA4E9D', '#984ea3','#999999', '#e41a1c', '#dede00']
#sns.set_style("darkgrid")
import matplotlib.font_manager as fm
font_files = fm.findSystemFonts()

plt.rcdefaults()
# Go through and add each to Matplotlib's font cache.
for font_file in font_files:
    fm.fontManager.addfont(font_file)
plt.rc('font', family='Roboto')
plt.rc('font', family='Roboto')

plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = 'Roboto'
#%config InlineBackend.figure_format='retina'
# pytorch relates imports
import torch
import torch.nn as nn
import torch.optim as optim

# imports from captum library
from captum.attr import LayerDeepLift
# for combobox
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pd.options.display.min_rows = 20000
pd.set_option('max_colwidth', 200)
pd.options.display.max_rows = 20000
pd.set_option('min_rows', 20000)
mac = "/Users/katyna/Library/CloudStorage/OneDrive-Tecnun/"
windows = "C:/Users/ksada/OneDrive - Tecnun/"
computer = windows # CHANGE
sys.path.append(computer + "SparseGO_code/code")
import util
from util import *
%matplotlib inline
#%matplotlib inline

# To make histograms
def histogram(dataframe, color, title, ylabel,n_bins):
    N, bins, patches = plt.hist(dataframe, color=color,bins=n_bins, linewidth=0.1)

    for i in range(0,len(bins)-1):
        if bins[i]<0.05:
            patches[i].set_facecolor(CB_color_cycle[2])

    plt.xlabel("P-value", fontsize=16)  
    plt.ylabel(ylabel, fontsize=16)
    plt.title(title, fontsize=16)
    plt.xticks(fontsize=14)  
    plt.yticks(fontsize=14)
    ax = plt.subplot(111)  
    ax.spines["top"].set_visible(False)  
    ax.spines["right"].set_visible(False)    
inputdir = computer+"SparseGO_code/data/cross_validation_expression/allsamples/" # CHANGE
dir1=computer+"Tesis/Codigo/VariableImportance/"
dir2=computer+"SparseGO_code/results/weights&biases/Expression_MSE_all/" # CHANGE
resultsdir=dir2
gene2id = inputdir+"gene2ind.txt"
cell2id=inputdir+"cell2ind.txt"
drug2id=inputdir+"drug2ind.txt"
drug2fingerprint=inputdir+"drug2fingerprint.txt"
load=resultsdir+"last_model.pt"

onto = inputdir+"ontology.txt"  # CHANGE 
genotype=inputdir+"cell2expression.txt"  # CHANGE 

num_neurons_per_GO = 6 # CHANGE

DeepLIFT

gene2id_mapping = load_mapping(gene2id)
dG, terms_pairs, genes_terms_pairs = load_ontology(onto, gene2id_mapping)
sorted_pairs, level_list, level_number = sort_pairs(genes_terms_pairs, terms_pairs, dG, gene2id_mapping)
layer_connections = pairs_in_layers(sorted_pairs, level_list, level_number) 

cell_features = np.genfromtxt(genotype, delimiter=',')
drug_features = np.genfromtxt(drug2fingerprint, delimiter=',')

drug2id_mapping = load_mapping(drug2id)
cell2id_mapping = load_mapping(cell2id)

num_genes = len(gene2id_mapping)
drug_dim = len(drug_features[0,:])
There are 15015 genes
There are 1 roots: GO:0008150
There are 4184 terms
There are 1 connected components
model = torch.load(load, map_location='cuda:%d' % 0)
model
sparseGO_nn(
  (genes_terms_sparse_linear_1): SparseLinearNew(
    in_features=15015, out_features=25104, bias=True, sparsity=0.0030196221878822263, connectivity=tensor([[    0,     1,     2,  ..., 23721, 23722, 23723],
            [    0,     0,     0,  ..., 15014, 15014, 15014]], device='cuda:0'), small_world=False
  )
  (genes_terms_tanh): Tanh()
  (genes_terms_batchnorm): BatchNorm1d(25104, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (GO_terms_sparse_linear_1): SparseLinearNew(
    in_features=25104, out_features=8304, bias=True, sparsity=0.002372788160788691, connectivity=tensor([[  966,   967,   968,  ...,  7047,  7048,  7049],
            [    0,     0,     0,  ..., 25103, 25103, 25103]], device='cuda:0'), small_world=False
  )
  (GO_terms_tanh_1): Tanh()
  (GO_terms_batchnorm_1): BatchNorm1d(8304, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (GO_terms_sparse_linear_2): SparseLinearNew(
    in_features=8304, out_features=3684, bias=True, sparsity=0.003911619061964564, connectivity=tensor([[   0,    1,    2,  ..., 3681, 3682, 3683],
            [   0,    0,    0,  ..., 8303, 8303, 8303]], device='cuda:0'), small_world=False
  )
  (GO_terms_tanh_2): Tanh()
  (GO_terms_batchnorm_2): BatchNorm1d(3684, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (GO_terms_sparse_linear_3): SparseLinearNew(
    in_features=3684, out_features=1650, bias=True, sparsity=0.007924193070772875, connectivity=tensor([[ 150,  151,  152,  ..., 1641, 1642, 1643],
            [   0,    0,    0,  ..., 3683, 3683, 3683]], device='cuda:0'), small_world=False
  )
  (GO_terms_tanh_3): Tanh()
  (GO_terms_batchnorm_3): BatchNorm1d(1650, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (GO_terms_sparse_linear_4): SparseLinearNew(
    in_features=1650, out_features=726, bias=True, sparsity=0.015807663410969196, connectivity=tensor([[ 474,  475,  476,  ...,  711,  712,  713],
            [   0,    0,    0,  ..., 1649, 1649, 1649]], device='cuda:0'), small_world=False
  )
  (GO_terms_tanh_4): Tanh()
  (GO_terms_batchnorm_4): BatchNorm1d(726, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (GO_terms_sparse_linear_5): SparseLinearNew(
    in_features=726, out_features=318, bias=True, sparsity=0.03305785123966942, connectivity=tensor([[ 60,  61,  62,  ..., 105, 106, 107],
            [  0,   0,   0,  ..., 725, 725, 725]], device='cuda:0'), small_world=False
  )
  (GO_terms_tanh_5): Tanh()
  (GO_terms_batchnorm_5): BatchNorm1d(318, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (GO_terms_sparse_linear_6): SparseLinearNew(
    in_features=318, out_features=120, bias=True, sparsity=0.06981132075471698, connectivity=tensor([[  0,   1,   2,  ...,  93,  94,  95],
            [  0,   0,   0,  ..., 317, 317, 317]], device='cuda:0'), small_world=False
  )
  (GO_terms_tanh_6): Tanh()
  (GO_terms_batchnorm_6): BatchNorm1d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (GO_terms_sparse_linear_7): SparseLinearNew(
    in_features=120, out_features=42, bias=True, sparsity=0.2, connectivity=tensor([[ 18,  19,  20,  ...,  21,  22,  23],
            [  0,   0,   0,  ..., 119, 119, 119]], device='cuda:0'), small_world=False
  )
  (GO_terms_tanh_7): Tanh()
  (GO_terms_batchnorm_7): BatchNorm1d(42, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (GO_terms_sparse_linear_8): SparseLinearNew(
    in_features=42, out_features=30, bias=True, sparsity=1.0, connectivity=tensor([[ 0,  1,  2,  ..., 27, 28, 29],
            [ 0,  0,  0,  ..., 41, 41, 41]], device='cuda:0'), small_world=False
  )
  (GO_terms_tanh_8): Tanh()
  (GO_terms_batchnorm_8): BatchNorm1d(30, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (drug_linear_layer_1): Linear(in_features=2048, out_features=200, bias=True)
  (drug_tanh_1): Tanh()
  (drug_batchnorm_layer_1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (drug_linear_layer_2): Linear(in_features=200, out_features=100, bias=True)
  (drug_tanh_2): Tanh()
  (drug_batchnorm_layer_2): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (drug_linear_layer_3): Linear(in_features=100, out_features=50, bias=True)
  (drug_tanh_3): Tanh()
  (drug_batchnorm_layer_3): BatchNorm1d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (final_linear_layer): Linear(in_features=80, out_features=40, bias=True)
  (final_tanh): Tanh()
  (final_batchnorm_layer): BatchNorm1d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (final_aux_linear_layer): Linear(in_features=40, out_features=1, bias=True)
  (final_aux_tanh): Tanh()
  (final_linear_layer_output): Linear(in_features=1, out_features=1, bias=True)
)
# Save layers to be analyzed
model_layers = []
model_layers.append(model.genes_terms_sparse_linear_1)
model_layers.append(model.GO_terms_sparse_linear_1)
model_layers.append(model.GO_terms_sparse_linear_2)
model_layers.append(model.GO_terms_sparse_linear_3)
model_layers.append(model.GO_terms_sparse_linear_4)
model_layers.append(model.GO_terms_sparse_linear_5)
model_layers.append(model.GO_terms_sparse_linear_6)
model_layers.append(model.GO_terms_sparse_linear_7)

GO terms info

# Go term names
gene_ontology = pd.read_excel('all_go_terms_info.xlsx')

Get all layers’ GO term with the neuron number

all_terms_ids = {}
all_terms_names = {}
all_layers_non_virtual = {} # store only terms that are part of the layer (remove virtual), those are the important attribuitions
all_layers_non_virtual_names = {}
num_neurons_per_GO = 6

for layer_number in range(len(layer_connections)-1):
    layer_pairs = layer_connections[layer_number] 
    
    terms_ids = []
    names = []
    output_id = create_index(layer_pairs[:,0]) # first 6 neurons correspond to the term with key 0
    
    for term in output_id.keys():
        #name = gene_ontology.loc[gene_ontology['GO_term'] == term].to_numpy()[0,3].replace("_"," ").capitalize()
        name = gene_ontology.loc[gene_ontology['id'] == term].to_numpy()[0,1].capitalize()
        for i in range(1,num_neurons_per_GO+1): # vector que tiene GO:0000038_1, GO:0000038_2 ... GO:0000038_6 y asi luego concatenar con las attributions
            terms_ids.append(term+"_"+str(i))
            names.append(name+" ("+str(i)+")")
    all_terms_ids[layer_number] = np.array(terms_ids)
    all_terms_names[layer_number] = np.array(names)

    non_virtual = [] # store the terms part of that layer
    non_virtual_names = []
    for term in level_list[layer_number+1]:
        nv_name = gene_ontology.loc[gene_ontology['id'] == term].to_numpy()[0,1].capitalize()
        for i in range(1,7):
            non_virtual.append(term+"_"+str(i))
            non_virtual_names.append(nv_name+" ("+str(i)+")")
    all_layers_non_virtual[layer_number] = non_virtual
    all_layers_non_virtual_names[layer_number] = non_virtual_names

All GO terms part of a layer (non-virtual) with their corresponding name and layer number…

real_go_info = pd.DataFrame({"GO_term":[],"Name":[],"layer_number":[]})
for layer_number in range(len(layer_connections)-1):
    layer_go_info = pd.DataFrame({"GO_term":all_layers_non_virtual[layer_number],"Name":all_layers_non_virtual_names[layer_number],"layer_number":(layer_number)})
    real_go_info = pd.concat((real_go_info,layer_go_info))
real_go_info.head()
GO_term Name layer_number
0 GO:0000019_1 Regulation of mitotic recombination (1) 0.0
1 GO:0000019_2 Regulation of mitotic recombination (2) 0.0
2 GO:0000019_3 Regulation of mitotic recombination (3) 0.0
3 GO:0000019_4 Regulation of mitotic recombination (4) 0.0
4 GO:0000019_5 Regulation of mitotic recombination (5) 0.0

Drugs info

def get_compound_names(file_name):
    compounds = []

    with open(file_name, 'r') as fi:
        for line in fi:
            tokens = line.strip().split('\t')
            compounds.append([tokens[1],tokens[2]])
    return compounds
drugs = get_compound_names(inputdir+"compound_names.txt")
drugs.pop(0)
['SMILE', 'Name']

DeepLIFT for VNN

Reference activation… (baseline)

median_cell_features = np.median(cell_features,axis=0) # to use as a reference
median_drug_features = np.genfromtxt(computer+"SparseGO_code/data/glucose_fingerprint.txt", delimiter=',')

Attribution function: sum

def get_layer_attribution(layer_number,input_data,baseline,selected_drug_data):
    dl = LayerDeepLift(model, model_layers[layer_number],multiply_by_inputs = True) # CHOOSE LAYER TO STUDY
    dl_attr_test = dl.attribute(input_data,baseline)
    dl_attr_test_sum = dl_attr_test.cpu().detach().numpy().sum(0) # se suman las attributions para cada sample
    
    attribution_data = pd.DataFrame(np.column_stack((all_terms_ids[layer_number],dl_attr_test_sum)), columns=["GO_term",selected_drug_data[1]])
    attribution_data[[selected_drug_data[1]]] = attribution_data[[selected_drug_data[1]]].apply(pd.to_numeric).round(10)
    attribution_data = attribution_data.loc[attribution_data['GO_term'].isin(all_layers_non_virtual[layer_number])] # only the keep the non virtual terms
    return attribution_data

DeepLIFT for all drugs

attribution_data_all = pd.DataFrame()
# Obtain the top GO terms on all layers for each drug
for selected_drug_data in drugs:
    selected_drug =selected_drug_data[0] # DRUG smile
    selected_drug_features = []
    drug_specific_features=drug_features[drug2id_mapping[selected_drug]] # features of drug
    
    for i in range(len(cell2id_mapping)): # make all combinations of selected_drug and cell types 
        selected_drug_features.append(np.concatenate((cell_features[i], drug_specific_features), axis=None))
    selected_drug_features = torch.FloatTensor(np.array(selected_drug_features))

    # Data for deeplift...
    input_data = torch.autograd.Variable(selected_drug_features.cuda(0))
    
    #median_drug_features = drug_specific_features
    # baseline is the median of the expression data and drug features 
    baseline = torch.FloatTensor(np.concatenate((median_cell_features, median_drug_features), axis=None))
    baseline = torch.reshape(baseline, (1, baseline.size()[0]))
    baseline = torch.autograd.Variable(baseline.cuda(0))
    
    attribution_data_drug = list(map(get_layer_attribution,range(0,len(model_layers)),itertools.repeat(input_data, len(model_layers)),itertools.repeat(baseline, len(model_layers)),itertools.repeat(selected_drug_data, len(model_layers)))) # get the attribution for each layer (map is similar to apply)
    attribution_data_drug = pd.concat(attribution_data_drug) # concatenate attribution of all layers
    
    attribution_data_all = pd.concat([attribution_data_all,attribution_data_drug.iloc[:,1]], axis=1)
    
    print(selected_drug_data[1])
    
attribution_data_all = pd.concat([attribution_data_drug.iloc[:,0],attribution_data_all], axis=1)
attribution_data_all = attribution_data_all.set_index("GO_term")
attribution_data_all.head()
BRD-K02251932-001-01-3 BRD-K25737009-001-01-2 Nintedanib bicalutamide N-[(2R,3S)-2-[[cyclopropylmethyl(methyl)amino]methyl]-5-[(2R)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-3,4-dihydro-2H-1,5-benzoxazocin-8-yl]-1-methyl-4-imidazolesulfonamide PHA-665752 N-cyclopropyl-3-[3-[[cyclopropyl(oxo)methyl]amino]-1H-indazol-6-yl]benzamide Ki8751 IPA-3 FAWUGYGEBHAQBU-PPEXNQRJSA-N ... ML031 Semagacestat RITA CDK9 inhibitor Dasatinib BMS-536924;CC1=CC(=CC2=C1NC(=C3C(=CC=NC3=O)NC[C@H](C4=CC(=CC=C4)Cl)O)N2)N5CCOCC5 SCHEMBL13741284 Daporinad STF-31 Narciclasine
GO_term
GO:0000012_1 -0.006564 -0.005680 0.003188 -0.005863 -0.003410 -0.002951 0.001118 0.002033 0.000799 -0.007842 ... -0.007256 -0.004271 -0.013783 -0.006253 0.002864 0.009604 -0.008099 -0.001475 -0.003698 -0.009866
GO:0000012_2 0.010029 0.011514 0.009892 0.012072 0.005788 0.012909 0.002316 0.009362 -0.011816 0.000166 ... 0.008918 -0.002449 0.017704 0.006732 0.002447 0.006485 0.003888 -0.000569 0.001628 0.017132
GO:0000012_3 0.008466 0.006840 -0.000027 0.006379 0.003082 -0.006110 -0.008877 -0.000347 -0.013084 0.000150 ... -0.006096 0.011308 0.012216 0.000997 0.011521 0.013800 0.002843 0.016328 0.021640 0.003536
GO:0000012_4 0.013018 0.007276 0.010128 0.008622 0.004795 0.006706 0.000874 0.005514 -0.003347 -0.000010 ... -0.003682 0.006544 0.010806 0.003346 0.017556 0.023130 0.001105 0.009710 0.016940 0.014787
GO:0000012_5 -0.007076 -0.006129 -0.007634 -0.003785 -0.004151 -0.007947 -0.008430 -0.006039 -0.002722 0.002163 ... 0.001821 -0.002346 -0.007831 -0.009368 -0.011118 -0.003408 -0.001760 0.003593 -0.000109 -0.020831

5 rows × 684 columns

ChEMBL Drug Target Slim

from chembl_webresource_client.new_client import new_client

Import SparseGO drugs

# Get names 
def get_compound_names(file_name):
    compounds = []

    with open(file_name, 'r') as fi:
        for line in fi:
            tokens = line.strip().split('\t')
            compounds.append(tokens[2].lower())

    return compounds
names = get_compound_names(computer+"SparseGO_code/data/compound_names.txt")
names.pop(0)
'name'

chEML IDs

Get chembl IDs of drugs if available (there are always 684 drugs, the compounds2ids object can be reused)

# Get all chembl IDs -- tarda
molecule = new_client.molecule

compounds2ids = {}
for i,drug in enumerate(names):
    
    if " + " in drug:
        drug_split = drug.split(" + ", 1)
        ID1 = list(molecule.filter(pref_name__iexact=drug_split[0]).only('molecule_chembl_id'))
        ID2 = list(molecule.filter(pref_name__iexact=drug_split[1]).only('molecule_chembl_id'))
        if len(ID1)>0 and len(ID2)>0:
            compounds2ids[drug]=[ID1[0]['molecule_chembl_id'],ID2[0]['molecule_chembl_id']]
        elif len(ID1)>0:
            compounds2ids[drug]=ID1[0]['molecule_chembl_id'] 
        elif len(ID2)>0:
            compounds2ids[drug]=ID2[0]['molecule_chembl_id'] 
        else:
            print(drug,i)

    else:
        ID = list(molecule.filter(pref_name__iexact=drug).only('molecule_chembl_id'))
        if len(ID)>0:
            ID = ID[0]['molecule_chembl_id']
            compounds2ids[drug]=ID
        else:
            # for drugs that have the chembl ID as the name!!
            ID = list(molecule.filter(chembl_id=drug).only('molecule_chembl_id')) 
            if len(ID)>0:
                ID = ID[0]['molecule_chembl_id']
                compounds2ids[drug]=ID
            else:
                # in case it is not found by pref_name
                ID = list(molecule.filter(molecule_synonyms__molecule_synonym__iexact=drug).only('molecule_chembl_id'))
                if len(ID)>0:
                    ID = ID[0]['molecule_chembl_id']
                    compounds2ids[drug]=ID
                else:
                    print(drug,i)
    # 341 chembl IDs where found (october 31 2022)
#manually add 6 more
compounds2ids["teniposide [usan]"]="CHEMBL452231"
compounds2ids["docetaxel (taxotere)"]="CHEMBL92"
compounds2ids["nan + navitoclax(1)"]="CHEMBL443684"
compounds2ids["nan + navitoclax(2)"]="CHEMBL443684"
compounds2ids["osi-027;coc1=cc=cc2=cc(=c3c4=c(n=cnn4c(=n3)c5ccc(cc5)c(=o)o)n)n=c21"]="CHEMBL3120215"
compounds2ids["paclitaxel;cc1=c2[c@h](c(=o)[c@@]3([c@h](c[c@@h]4[c@]([c@h]3[c@@h]([c@@](c2(c)c)(c[c@@h]1oc(=o)[c@@h]([c@h](c5=cc=cc=c5)nc(=o)c6=cc=cc=c6)o)o)oc(=o)c7=cc=cc=c7)(co4)oc(=o)c)o)c)oc(=o)c"]="CHEMBL428647"
len(compounds2ids)
347

chEMBL MoA (targets)

Get the molecule targets of each drug (if available)

compounds2targets = dict() # required to store the drug targets 
for drug in compounds2ids.keys():
    compounds2targets[drug] = set()

chembl_ids = list(compounds2ids.values()) # Chembl IDs of drugs

for drug in compounds2ids:
    # we jump from compounds to targets through activities:
    activities = new_client.mechanism.filter(parent_molecule_chembl_id__in=compounds2ids[drug]).only(
        ['parent_molecule_chembl_id', 'target_chembl_id'])
    # extracting target ChEMBL IDs from activities:
    for act in activities:
        compounds2targets[drug].add(act['target_chembl_id'])
    print(drug)
# We now know all targets for some drug
compounds2targets = {k: v for k, v in compounds2targets.items() if len(v) != 0 and len([x for x in list(v) if x is not None]) != 0 }
# 218 DRUGS HAVE ANNOTATED DRUG TARGETS
len(compounds2targets)
220

Drug slim GO terms

Get the GO terms of each target

# Get the GO terms of each target
compounds_GOterms = {}
for i in range(0, len(compounds2targets.keys())):
    compound = list(compounds2targets.keys())[i]
    GOterms_list = []
    
    for j in range(0, len(list(compounds2targets[compound]))):   
        target = list(compounds2targets[compound])[j]
        all_cross_references = list(new_client.target.filter(target_chembl_id=target).only(['target_components']).only(['target_components_xrefs']))[0]['target_components']
        if len(all_cross_references)>0: # not all targets have annotated go_terms
            for i in range(0, len(all_cross_references)):
                GOterms = all_cross_references[i]
                GOterms = pd.DataFrame(GOterms['target_component_xrefs'])
                GOterms = pd.concat([GOterms,pd.Series([target]).repeat(len(GOterms)).reset_index().pop(0)],axis=1) # add target ID to dataframe 
                GOterms_list= GOterms_list + GOterms.values.tolist()
    
    compounds_GOterms[compound] =  pd.DataFrame(GOterms_list).drop_duplicates()
    print(compound)
len(compounds_GOterms)
220
# we have 206 annotated drugs on CHEMBL
# add GO terms found in CTRPv2
CTRPv2_terms = pd.read_excel('ctrp_goterms_drugs.xlsx')  
# add GO terms of drugs with or without annotations
for drug in CTRPv2_terms["Drug"].unique():
    if drug not in list(compounds_GOterms.keys()): # some drugs had no previous data, no annotations from chembl
        compounds_GOterms[drug] = pd.DataFrame() # create empty dataframe

    for term in list(CTRPv2_terms.loc[CTRPv2_terms["Drug"]==drug]["Field"]):
        compounds_GOterms[drug] = pd.concat([compounds_GOterms[drug],pd.DataFrame([term,"","GoProcess",""]).transpose()])
    compounds_GOterms[drug] = compounds_GOterms[drug].drop_duplicates() 

# now we have 233 annotated drugs
# Delete drugs with no GOterms (some targets have no annotated GO terms)
compounds_GOterms = {k: v for k, v in compounds_GOterms.items() if len(v) != 0 } 
len(compounds_GOterms)
236

Match GO terms

Find all terms that match, terms that are part of both, the sparseGO graph and the drug slim results…

def load_ontology_extra_output(ontology_file, gene2id_mapping):
    """
    Creates the directed graph of the GO terms and stores the connected elements in arrays.

        Output
        ------
        dG: networkx.classes.digraph.DiGraph
            Directed graph of all terms

        terms_pairs: numpy.ndarray
            Store the connection between a term and a term

        genes_terms_pairs: numpy.ndarray
            Store the connection between a gene and a term
    """

    dG = nx.DiGraph() # Directed graph class

    file_handle = open(ontology_file) #  Open the file that has genes and go terms

    terms_pairs = [] # store the pairs between a term and a term
    genes_terms_pairs = [] # store the pairs between a gene and a term

    gene_set = set() # create a set (elements can't repeat)
    term_direct_gene_map = {}
    term_size_map = {}


    for line in file_handle:

        line = line.rstrip().split() # delete spaces and transform to list, line has 3 elements

        # No me hace falta el if, no tengo que separar las parejas
        if line[2] == 'default': # si el tercer elemento es default entonces se conectan los terms en el grafo
            dG.add_edge(line[0], line[1]) # Add an edge between line[0] and line[1]
            terms_pairs.append([line[0], line[1]]) # Add the pair to the list
        else:
            if line[1] not in gene2id_mapping: # se salta el gen si no es parte de los que estan en gene2id_mapping
                print(line[1])
                continue

            genes_terms_pairs.append([line[0], line[1]]) # add the pair

            if line[0] not in term_direct_gene_map: # si el termino todavia no esta en el diccionario lo agrega
                term_direct_gene_map[ line[0] ] = set() # crea un set

            term_direct_gene_map[line[0]].add(gene2id_mapping[line[1]]) # añadimos el gen al set de ese term

            gene_set.add(line[1]) # añadimos el gen al set total de genes

    terms_pairs = np.array(terms_pairs) # convert to 2d array
    genes_terms_pairs = np.array(genes_terms_pairs) # convert to 2d array

    file_handle.close()

    print('There are', len(gene_set), 'genes')

    for term in dG.nodes(): # hacemos esto para cada uno de los GO terms

        term_gene_set = set() # se crea un set

        if term in term_direct_gene_map:
            term_gene_set = term_direct_gene_map[term] # genes conectados al term

        deslist = nxadag.descendants(dG, term) #regresa todos sus GO terms descendientes (biological processes tiene 2085 descendientes, todos menos el mismo)

        for child in deslist:
            if child in term_direct_gene_map: # añadir los genes de sus descendientes
                term_gene_set = term_gene_set | term_direct_gene_map[child] # union of both sets, ahora tiene todos los genes los suyos y los de sus descendientes

        if len(term_gene_set) == 0:
            print('There is empty terms, please delete term:', term)
            sys.exit(1)
        else:
            # por ahora esta variable no me hace falta
            term_size_map[term] = len(term_gene_set) # cantidad de genes en ese term  (tomando en cuenta sus descendientes)

    leaves = [n for n in dG.nodes if dG.in_degree(n) == 0] # buscar la raiz
    #leaves = [n for n,d in dG.in_degree() if d==0]

    uG = dG.to_undirected() # Returns an undirected representation of the digraph
    connected_subG_list = list(nxacc.connected_components(uG)) #list of all GO terms

    # Verify my graph makes sense...
    print('There are', len(leaves), 'roots:', leaves[0])
    print('There are', len(dG.nodes()), 'terms')
    print('There are', len(connected_subG_list), 'connected components')
    if len(leaves) > 1:
        print('There are more than 1 root of ontology. Please use only one root.')
        sys.exit(1)
    if len(connected_subG_list) > 1:
        print( 'There are more than connected components. Please connect them.')
        sys.exit(1)

    return dG, terms_pairs, genes_terms_pairs, term_direct_gene_map, term_size_map

SparseGO graph

# Import SparseGO graph (to extract all nodes/terms)... 

# Load ontology: create the graph of connected GO terms
dG, terms_pairs, genes_terms_pairs, term_direct_gene_map, term_size_map = load_ontology_extra_output(onto, gene2id_mapping)
####
sparseGO_terms = list(dG.nodes())
sparseGO_terms.remove("GO:0008150")
There are 15015 genes
There are 1 roots: GO:0008150
There are 4184 terms
There are 1 connected components

Full GO graph

# Import full graph (to find parents)...
import obonet
#import networkx as nx
url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'
full_graph = obonet.read_obo(url)
full_graph = full_graph.reverse() # change the direction of nodes
[n for n in full_graph.nodes if full_graph.in_degree(n) == 0] # graph contains the 3 roots (BP,MF,CC)
['GO:0003674', 'GO:0005575', 'GO:0008150']

Match terms!

Find all terms that match, terms that are part of both, the sparseGO graph and the drug slim results… if the slim terms’ ascendants are a match, they are also added

# Each model has DIFFERENT matches (the graph is different)
compounds_GOterms_matches = {}
for drug in compounds_GOterms.keys():
    # choose drug
    drug_df = compounds_GOterms[drug]
    drug_slim_GOterms = set(drug_df.loc[drug_df[2] == "GoProcess"][0]) # only GO processes
    #set(sparseGO_terms) & set(drug_slim_GOterms)
    drug_matches = [] # store all directly matched terms and matches with all parents
    for term in drug_slim_GOterms: # term ='GO:1902669' # buen ejemplo 
        
        if term in sparseGO_terms: # is the term in the sparseGO terms? 
            drug_matches.append([1,term]) # add to list
            #1: same term, 2:not direct match  (esto igual despues...the number indicates how direct is the relationship 0:same term, 1: parent, 2: grandpa, 3:...)
        
        # are its ascendants in the sparseGO terms? 
        parents = [source for source, _ in  full_graph.in_edges(term)] # parents of term
        relationship = 2
        while(len(parents)>0): # check all ascendants 
            #relationship+=1
            parents = [source for source, _ in  full_graph.in_edges(parents)] # parents of parents
        
            for parent_term in parents: # add parents that match sparseGO terms 
                if parent_term in sparseGO_terms:
                    drug_matches.append([relationship, parent_term])
        
        drug_matches = (pd.DataFrame(drug_matches).drop_duplicates()).values.tolist() # remove duplicates
        compounds_GOterms_matches[drug] = drug_matches
    print(drug)
# delete drugs that have no matches
compounds_GOterms_matches = {i:j for i,j in compounds_GOterms_matches.items() if j != []}
len(compounds_GOterms_matches)
230

SparseGO terms x drugSlim terms matrix

attribution_data_all.columns = attribution_data_all.columns.str.lower() # in order to match the term
attribution_data_all.head()
brd-k02251932-001-01-3 brd-k25737009-001-01-2 nintedanib bicalutamide n-[(2r,3s)-2-[[cyclopropylmethyl(methyl)amino]methyl]-5-[(2r)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-3,4-dihydro-2h-1,5-benzoxazocin-8-yl]-1-methyl-4-imidazolesulfonamide pha-665752 n-cyclopropyl-3-[3-[[cyclopropyl(oxo)methyl]amino]-1h-indazol-6-yl]benzamide ki8751 ipa-3 fawugygebhaqbu-ppexnqrjsa-n ... ml031 semagacestat rita cdk9 inhibitor dasatinib bms-536924;cc1=cc(=cc2=c1nc(=c3c(=cc=nc3=o)nc[c@h](c4=cc(=cc=c4)cl)o)n2)n5ccocc5 schembl13741284 daporinad stf-31 narciclasine
GO_term
GO:0000012_1 -0.006564 -0.005680 0.003188 -0.005863 -0.003410 -0.002951 0.001118 0.002033 0.000799 -0.007842 ... -0.007256 -0.004271 -0.013783 -0.006253 0.002864 0.009604 -0.008099 -0.001475 -0.003698 -0.009866
GO:0000012_2 0.010029 0.011514 0.009892 0.012072 0.005788 0.012909 0.002316 0.009362 -0.011816 0.000166 ... 0.008918 -0.002449 0.017704 0.006732 0.002447 0.006485 0.003888 -0.000569 0.001628 0.017132
GO:0000012_3 0.008466 0.006840 -0.000027 0.006379 0.003082 -0.006110 -0.008877 -0.000347 -0.013084 0.000150 ... -0.006096 0.011308 0.012216 0.000997 0.011521 0.013800 0.002843 0.016328 0.021640 0.003536
GO:0000012_4 0.013018 0.007276 0.010128 0.008622 0.004795 0.006706 0.000874 0.005514 -0.003347 -0.000010 ... -0.003682 0.006544 0.010806 0.003346 0.017556 0.023130 0.001105 0.009710 0.016940 0.014787
GO:0000012_5 -0.007076 -0.006129 -0.007634 -0.003785 -0.004151 -0.007947 -0.008430 -0.006039 -0.002722 0.002163 ... 0.001821 -0.002346 -0.007831 -0.009368 -0.011118 -0.003408 -0.001760 0.003593 -0.000109 -0.020831

5 rows × 684 columns

attribution_data_all.shape
(25098, 684)

Only keep drugs that have annotated GO terms

attribution_data_annotated = attribution_data_all[list(compounds_GOterms_matches.keys())]
attribution_data_annotated.shape # 230 DRUGS
(25098, 230)

Build drugSlim (MoA) matrix

slim_matrix = attribution_data_annotated.copy() # copy dataframe in order to build a similar matrix 
for col in slim_matrix.columns:
    slim_matrix[col].values[:] = 0 # empty matrix 
for drug in compounds_GOterms_matches.keys():
    drug_matches = compounds_GOterms_matches[drug]
    drug_matches_names = list(pd.DataFrame(drug_matches)[1])
    
    drug_matches_names_duplicated = []
    for term in set(drug_matches_names):
        for i in range(1,7):
            drug_matches_names_duplicated.append(term+"_"+str(i))
    
    slim_matrix[drug][drug_matches_names_duplicated] = 1 # add a 1 if term is annotated to drug

Build matrices to store logits, predictions and real values

logits_matrix = pd.DataFrame(0, index=sparseGO_terms, columns=slim_matrix.columns)
preds_matrix = pd.DataFrame(0, index=sparseGO_terms, columns=slim_matrix.columns)
slim_matrix_single_neuron = pd.DataFrame(0, index=sparseGO_terms, columns=slim_matrix.columns)

Create models

Regression models…

# Dictionaries to store results
GO_terms_auc_log = {}
GO_terms_aupr_log = {}
GO_terms_precision_log = {}

# Perform logistic
for goterm in sparseGO_terms:
    #store results of each cross validation
    # if (real_go_info[real_go_info["GO_term"]==goterm+"_1"]["layer_number"]).values >3:
    #     continue
    
    all_y_test = []
    all_y_pred_proba = []
    all_y_pred = []
    all_y_names = []
    X = []

    goterm_drugs = slim_matrix.loc[[goterm+"_"+str(1)]].values.flatten()

    if sum(goterm_drugs) <= 8:
            continue

    list_nodes = []
    for i in range(1,7):
        list_nodes.append(goterm+"_"+str(i))

    score = attribution_data_annotated.loc[list_nodes].T
    #score_mod = score
    score_mod = score.divide(score.std()).fillna(0) # AFECTA MUCHO

    # Separate drugs in 4 groups for cross-validation -----

    # Split data in 2 groups (with train_test_split in order to have 0s in both groups)
    X_part1,X_part2,y_part1,y_part2=train_test_split(score_mod,goterm_drugs,test_size=0.50,random_state=0,stratify=goterm_drugs)
    # Split data again in 4 groups (split data previously split)
    X_group1,X_group2,y_group1,y_group2=train_test_split(X_part1,y_part1,test_size=0.50,random_state=0,stratify=y_part1)
    X_group3,X_group4,y_group3,y_group4=train_test_split(X_part2,y_part2,test_size=0.50,random_state=0,stratify=y_part2)

    for i in  range(1,5):
        vector = range(0,5)
        group_number = str(i)
        X_test = globals()["X_group"+group_number]
        y_test = globals()["y_group"+group_number]

        # Use the other 3 groups for training 
        keep = list({1,2,3,4}-{int(group_number)}) # remove group number of current test 
        X_train = pd.concat((globals()["X_group"+str(keep[0])],globals()["X_group"+str(keep[1])],globals()["X_group"+str(keep[2])]))
        y_train = np.concatenate((globals()["y_group"+str(keep[0])],globals()["y_group"+str(keep[1])],globals()["y_group"+str(keep[2])]))
        
        logreg = LogisticRegression(penalty="l2",solver="liblinear",max_iter=2000, C=10e-2,class_weight="balanced")
        
        # fit the model with data
        logreg.fit(X_train,y_train)
        y_pred=logreg.predict(X_test)
        y_pred_proba = logreg.predict_proba(X_test)[::,1] # logits for 1 cross-validation
        #y_pred_proba = logreg.decision_function(X_test) # signed distance of sample from hyperplane of your model.
        
        all_y_test.append(y_test)
        all_y_pred_proba.append(y_pred_proba)
        all_y_pred.append(y_pred)
        all_y_names.append(X_test.index)

    all_y_test = np.concatenate(all_y_test)
    all_y_pred_proba = np.concatenate(all_y_pred_proba)
    all_y_names = np.concatenate(all_y_names)
    all_y_pred = np.concatenate(all_y_pred)

    logits_matrix.loc[goterm,all_y_names] = all_y_pred_proba
    slim_matrix_single_neuron.loc[goterm,all_y_names] = all_y_test
    preds_matrix.loc[goterm,all_y_names] = all_y_pred

    # fpr, tpr, _ = metrics.roc_curve(all_y_test,  all_y_pred_proba)
    # GO_terms_auc_log[goterm] = metrics.auc(fpr, tpr) # same as roc_auc_score
    GO_terms_auc_log[goterm] = metrics.roc_auc_score(all_y_test, all_y_pred_proba)

    precision, recall, thresholds = metrics.precision_recall_curve(all_y_test, all_y_pred_proba)
    GO_terms_aupr_log[goterm] = metrics.auc(recall, precision)
    GO_terms_precision_log[goterm] = metrics.precision_score(all_y_test, all_y_pred)
NameError: name 'train_test_split' is not defined
  • class_weight=“balanced”… mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as n_samples / (n_classes * np.bincount(y))

  • C , default=1.0… Inverse of regularization strength; must be a positive float. Like in support vector machines, smaller values specify stronger regularization.

  • solver{‘newton-cg’, ‘lbfgs’, ‘liblinear’, ‘sag’, ‘saga’}

  • Los resultados son iguales si uso predict_proba o decision_function, solo a la hora de interpretar predicted_proba si me da un porcentaje y decision_function una distancia a la recta, el AUC me sale exactamente igual en la regresion logistica

GO_terms_auc_log_df = pd.DataFrame(list(GO_terms_auc_log.items()),columns = ['goterm','auc']).set_index("goterm")
GO_terms_auc_log_df = GO_terms_auc_log_df.dropna()
GO_terms_auc_log_df.sort_values(by=["auc"], ascending=False)
print("There are " +str(len(GO_terms_auc_log_df))+ " logistic regression models.")
# only keep goterms that have a model 
logits_matrix = logits_matrix.loc[list(GO_terms_auc_log_df.index),:]
slim_matrix_single_neuron  = slim_matrix_single_neuron.loc[list(GO_terms_auc_log_df.index),:]
preds_matrix  = preds_matrix.loc[list(GO_terms_auc_log_df.index),:]

AUC histogram

sns.set(rc={'figure.figsize':(10,6)})
fig, ax = plt.subplots()
perc = str(round((100*len(GO_terms_auc_log_df[GO_terms_auc_log_df["auc"]>0.69])/len(GO_terms_auc_log_df)),2))+"%"
N, bins, patches = plt.hist(GO_terms_auc_log_df, color=CB_color_cycle[6],bins=50, linewidth=0.1)

for i in range(0,len(bins)-1):
    if bins[i]>0.69:
        patches[i].set_facecolor(CB_color_cycle[2])

plt.yticks(fontsize=16)
plt.xticks(fontsize=16)

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_color('#DDDDDD')

# Second, remove the ticks as well.
ax.tick_params(bottom=False, left=True)

# Third, add a horizontal grid (but keep the vertical grid hidden).
# Color the lines a light gray as well.
ax.set_axisbelow(True)
ax.yaxis.grid(True, color='#EEEEEE')
ax.xaxis.grid(False)

plt.xlabel("AUC value", fontsize=20)
plt.ylabel("Number of GO term models", fontsize=20)
colors2 = {'GO term models with AUC>=0.7':CB_color_cycle[2]}  
labels = list(colors2.keys())
handles = [plt.Rectangle((0,0),1,1, color=colors2[label]) for label in labels]
plt.legend(handles, labels,fontsize=20, loc="lower left", bbox_to_anchor=(0.35,-0.35))
plt.text(0.71, 8, str(perc), fontsize=20,color='#333333')
plt.title("Overall performance of the models using expression", fontsize=24)
# con el que mejor funciona es con la suma normal del attribution 
fig.tight_layout()
fig.savefig(resultsdir+'modelsAUClog.png', transparent=True)

AUC boxplot by parents

# Add number of parents
number_parents = {}
levels = {}
for i in range(0,len(GO_terms_auc_log_df.index)):
    term = GO_terms_auc_log_df.index[i]
    number_parents[GO_terms_auc_log_df.index[i]]=len([source for source, _ in  dG.in_edges(term)])
    levels[GO_terms_auc_log_df.index[i]]=level_number[term]-1
levels = pd.DataFrame.from_dict(levels, orient='index')
number_parents = pd.DataFrame.from_dict(number_parents, orient='index')

GO_terms_auc_log_df = pd.concat([GO_terms_auc_log_df, levels,number_parents], axis=1)
GO_terms_auc_log_df.columns = ["auc","levels","parents"]
sns.set(rc={'figure.figsize':(10,6)})
fig, ax = plt.subplots()
ax = sns.boxplot(x="levels", y="auc", data=GO_terms_auc_log_df)

plt.yticks(fontsize=16)
plt.xticks(fontsize=16)

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_color('#DDDDDD')

# Second, remove the ticks as well.
ax.tick_params(bottom=False, left=True)

# Third, add a horizontal grid (but keep the vertical grid hidden).
# Color the lines a light gray as well.
ax.set_axisbelow(True)
ax.yaxis.grid(True, color='#EEEEEE')
ax.xaxis.grid(False)

plt.xlabel("Level number", fontsize=20)
plt.ylabel("AUC value", fontsize=20)
plt.title("AUC value per level of GO hierarchy", fontsize=24)
# con el que mejor funciona es con la suma normal del attribution 
fig.tight_layout()
fig.savefig(resultsdir+'AUCbyLevelslog.png', transparent=True)
GO_terms_auc_log_df.head()

import pyreadr pyreadr.write_rdata(“C:/Users/ksada/OneDrive - Tecnun/SparseGO_Rdata/cv_allsamples_mutations/”+“GO_terms_auc_logarithm.RData”, GO_terms_auc_log_df.reset_index(), df_name=“GO_terms_auc_logarithm”)

TOP 15 PREDICTED GO TERMS

top15goterms= np.array(GO_terms_auc_log_df.sort_values(by=["auc"], ascending=False)[0:15].index)

Get Top GO term names

top15goterms_1 = []
for goterm in top15goterms:
    top15goterms_1.append(goterm+"_"+str(1))
real_go_info_mod_best = real_go_info[real_go_info.GO_term.isin(top15goterms_1)]
real_go_info_mod_best.GO_term = real_go_info_mod_best.GO_term.str.replace("_1","")
top15goterms_auc = GO_terms_auc_log_df.sort_values(by=["auc"], ascending=False)[0:15].reset_index()
top15goterms_auc.columns=["GO_term","auc","levels","parents"]
top15goterms_auc.merge(real_go_info_mod_best[real_go_info_mod_best["GO_term"].isin(top15goterms)], on="GO_term")

WORST 15 PREDICTED GO TERMS

worst15goterms= np.array(GO_terms_auc_log_df.sort_values(by=["auc"], ascending=True)[0:15].index)

Get Worst GO term names

worst15goterms_1 = []
for goterm in worst15goterms:
    worst15goterms_1.append(goterm+"_"+str(1))
real_go_info_mod_worst = real_go_info[real_go_info.GO_term.isin(worst15goterms_1)]
real_go_info_mod_worst.GO_term = real_go_info_mod_worst.GO_term.str.replace("_1","")
worst15goterms_auc = GO_terms_auc_log_df.sort_values(by=["auc"], ascending=True)[0:15].reset_index()
worst15goterms_auc.columns=["GO_term","auc","levels","parents"]
worst15goterms_auc.merge(real_go_info_mod_worst[real_go_info_mod_worst["GO_term"].isin(worst15goterms)], on="GO_term")

AUPR histogram

GO_terms_aupr_log_df = pd.DataFrame(list(GO_terms_aupr_log.items()),columns = ['goterm','aupr']).set_index("goterm")
GO_terms_aupr_log_df = GO_terms_aupr_log_df.dropna()
GO_terms_aupr_log_df.sort_values(by=["aupr"], ascending=False).head()
# TENGO PROBLEMA CON EL RECALL 
sns.set(rc={'figure.figsize':(5,3)})
perc = str(round((100*len(GO_terms_aupr_log_df[GO_terms_aupr_log_df["aupr"]>0.69])/len(GO_terms_aupr_log_df)),2))+"%"
N, bins, patches = plt.hist(GO_terms_aupr_log_df, color=CB_color_cycle[6],bins=50, linewidth=0.1)
for i in range(0,len(bins)-1):
    if bins[i]>0.69:
        patches[i].set_facecolor(CB_color_cycle[3])

plt.xlabel("AUPR", fontsize=16)  
plt.title(perc, fontsize=16)

Precision histogram

GO_terms_precision_log_df = pd.DataFrame(list(GO_terms_precision_log.items()),columns = ['goterm','precision']).set_index("goterm")
GO_terms_precision_log_df = GO_terms_precision_log_df.dropna()
GO_terms_precision_log_df.sort_values(by=["precision"], ascending=False).head()
perc = str(round((100*len(GO_terms_precision_log_df[GO_terms_precision_log_df["precision"]>0.69])/len(GO_terms_precision_log_df)),2))+"%"
N, bins, patches = plt.hist(GO_terms_precision_log_df, color=CB_color_cycle[6],bins=50, linewidth=0.1)

for i in range(0,len(bins)-1):
    if bins[i]>0.69:
        patches[i].set_facecolor(CB_color_cycle[4])

plt.xlabel("Precision", fontsize=16)  
plt.title(perc, fontsize=16)

Example prediction

def f2(goterm):    
    return goterm
combobox_go = interactive(f2, goterm=widgets.Combobox(options=list(GO_terms_auc_log_df.sort_values(by=["auc"], ascending=False).index)))

Choose drug to study…

display(combobox_go)
selected_go = combobox_go.result
#auc
fpr, tpr, _ = metrics.roc_curve(slim_matrix_single_neuron.loc[selected_go],  logits_matrix.loc[selected_go])
auc = metrics.roc_auc_score(slim_matrix_single_neuron.loc[selected_go],  logits_matrix.loc[selected_go])
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()

plot = pd.concat([pd.DataFrame(slim_matrix_single_neuron.loc[selected_go]),pd.DataFrame(logits_matrix.loc[selected_go])], axis=1)
plot.columns = ["slim","probability"]
ax = sns.boxplot(x="slim", y="probability", data=plot,showfliers=False )
metrics.ConfusionMatrixDisplay.from_predictions(slim_matrix_single_neuron.loc[selected_go], preds_matrix.loc[selected_go])
plt.grid(b=None)
print("Accuracy:",metrics.accuracy_score(slim_matrix_single_neuron.loc[selected_go], preds_matrix.loc[selected_go]))
print("Precision:",metrics.precision_score(slim_matrix_single_neuron.loc[selected_go], preds_matrix.loc[selected_go]))
print("Recall:",metrics.recall_score(slim_matrix_single_neuron.loc[selected_go], preds_matrix.loc[selected_go])) #TP / (TP+FN)

TN - FP

FN - TP

precision, recall, thresholds = metrics.precision_recall_curve(slim_matrix_single_neuron.loc[selected_go],  logits_matrix.loc[selected_go])
auc_precision_recall = metrics.auc(recall, precision)
plt.plot(recall, precision,label=str(auc_precision_recall))
plt.legend(loc=4)
plt.show()

METRICS drugs

auc_drugs = {}
aupr_drugs = {}
for drug in list(slim_matrix_single_neuron.columns):
    if slim_matrix_single_neuron.loc[:,drug].sum() ==0:
        continue
    auc_drugs[drug] = metrics.roc_auc_score(slim_matrix_single_neuron.loc[:,drug],  logits_matrix.loc[:,drug])
    precision, recall, thresholds = metrics.precision_recall_curve(slim_matrix_single_neuron.loc[:,drug],  logits_matrix.loc[:,drug])
    aupr_drugs[drug] = metrics.auc(recall, precision)

auc_drugs_df = pd.DataFrame(list(auc_drugs.items()),columns = ['goterm','auc']).set_index("goterm")
auc_drugs_df = auc_drugs_df.dropna()

aupr_drugs_df = pd.DataFrame(list(aupr_drugs.items()),columns = ['goterm','aupr']).set_index("goterm")
aupr_drugs_df = aupr_drugs_df.dropna()

AUC histogram drugs

sns.set(rc={'figure.figsize':(10,6)})
fig, ax = plt.subplots()
perc = str(round((100*len(auc_drugs_df[auc_drugs_df["auc"]>0.69])/len(auc_drugs_df)),2))+"%"
N, bins, patches = plt.hist(auc_drugs_df, color=CB_color_cycle[6],bins=50, linewidth=0.1)

for i in range(0,len(bins)-1):
    if bins[i]>0.69:
        patches[i].set_facecolor(CB_color_cycle[5])

plt.yticks(fontsize=16)
plt.xticks(fontsize=16)

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_color('#DDDDDD')

# Second, remove the ticks as well.
ax.tick_params(bottom=False, left=True)

# Third, add a horizontal grid (but keep the vertical grid hidden).
# Color the lines a light gray as well.
ax.set_axisbelow(True)
ax.yaxis.grid(True, color='#EEEEEE')
ax.xaxis.grid(False)

plt.xlabel("AUC value", fontsize=20)
plt.ylabel("Number of drugs", fontsize=20)
colors2 = {'Drugs with AUC>=0.7':CB_color_cycle[5]}  
labels = list(colors2.keys())
handles = [plt.Rectangle((0,0),1,1, color=colors2[label]) for label in labels]
plt.legend(handles, labels,fontsize=20, loc="lower left", bbox_to_anchor=(0.35,-0.35))
plt.text(0.79, 6, str(perc), fontsize=20,color='#333333')
plt.title("Overall performance by drugs using mutations", fontsize=24)
# con el que mejor funciona es con la suma normal del attribution 
fig.tight_layout()
fig.savefig(resultsdir+'drugsAUClog.png', transparent=True)
auc_drugs_df.sort_values(by=["auc"], ascending=False)

AUPR histogram drugs

sns.set(rc={'figure.figsize':(5,3)})
perc = str(round((100*len(aupr_drugs_df[aupr_drugs_df["aupr"]>0.69])/len(aupr_drugs_df)),2))+"%"
N, bins, patches = plt.hist(aupr_drugs_df, color=CB_color_cycle[6],bins=50, linewidth=0.1)
for i in range(0,len(bins)-1):
    if bins[i]>0.69:
        patches[i].set_facecolor(CB_color_cycle[3])

plt.xlabel("AUPR drugs", fontsize=16)  
plt.title(perc, fontsize=16)
auc_drugs_df.sort_values(by=["auc"], ascending=False)

Example drug prediction

def f(drug):    
    return drug
combobox = interactive(f, drug=widgets.Combobox(options=list(auc_drugs_df.sort_values(by=["auc"], ascending=False).index)))

Choose drug to study…

display(combobox)
selected_drug_name = combobox.result
sns.set(rc={'figure.figsize':(4,2)})
#auc
fpr, tpr, _ = metrics.roc_curve(slim_matrix_single_neuron.loc[:,selected_drug_name], logits_matrix.loc[:,selected_drug_name] )
auc = metrics.roc_auc_score(slim_matrix_single_neuron.loc[:,selected_drug_name],  logits_matrix.loc[:,selected_drug_name])
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()

plot = pd.concat([pd.DataFrame(slim_matrix_single_neuron.loc[:,selected_drug_name]),pd.DataFrame(logits_matrix.loc[:,selected_drug_name])], axis=1)
plot.columns = ["slim","probability"]
ax = sns.boxplot(x="slim", y="probability", data=plot,showfliers=False )
sum(slim_matrix_single_neuron.loc[:,selected_drug_name])
plot = pd.concat([pd.DataFrame(slim_matrix.loc[:,selected_drug_name]),pd.DataFrame(attribution_data_all.loc[:,selected_drug_name]*1e4)], axis=1)
plot.columns = ["slim","attribution"]
ax = sns.boxplot(x="slim", y="attribution", data=plot,showfliers=True )
metrics.ConfusionMatrixDisplay.from_predictions(slim_matrix_single_neuron.loc[:,selected_drug_name], preds_matrix.loc[:,selected_drug_name])
plt.grid(visible=None)
print("Accuracy:",metrics.accuracy_score(slim_matrix_single_neuron.loc[:,selected_drug_name], preds_matrix.loc[:,selected_drug_name]))
print("Precision:",metrics.precision_score(slim_matrix_single_neuron.loc[:,selected_drug_name], preds_matrix.loc[:,selected_drug_name]))
print("Recall:",metrics.recall_score(slim_matrix_single_neuron.loc[:,selected_drug_name], preds_matrix.loc[:,selected_drug_name])) #TP / (TP+FN)
print("AUC with score:",auc) #TP / (TP+FN)

View drug’s top functions…

predictions_nodes = []
for goterm in list(logits_matrix.index):
    predictions_nodes.append(goterm+"_"+str(1))
# add names to go terms
real_go_info_log = real_go_info[real_go_info.GO_term.isin(predictions_nodes)]
real_go_info_log.GO_term = real_go_info_log.GO_term.str.replace("_1","")
# LOS LOGITS DE TEST!!
test_drug_logs = pd.DataFrame(logits_matrix.loc[:,selected_drug_name]).reset_index()
test_drug_logs.columns  = ["GO_term","probability"]
test_drug_logs = test_drug_logs.merge(real_go_info_log, on="GO_term")
test_drug_logs.sort_values(by=["probability"], ascending=False)
sns.set(rc={'figure.figsize':(15,8)})
ax = sns.boxplot(x="layer_number", y="probability", data=test_drug_logs, order=[7,6,5,4,3,2,1,0],showfliers=False)
ax = ax.set(xlabel='General terms                                                    -                                                          Specific terms')

Final model

Once the models have been cross-validated we create the final models using all samples…

GO_terms_auc_log_final = {}
GO_terms_aupr_log_final = {}
GO_terms_precision_log_final = {}
models_log = {}

# Perform logistics
for goterm in sparseGO_terms:
    #print(goterm)
    goterm_drugs = slim_matrix.loc[[goterm+"_"+str(1)]].values.flatten()
    
    if sum(goterm_drugs) <= 10:
        continue

    list_nodes = []
    for i in range(1,7):
        list_nodes.append(goterm+"_"+str(i))

    score = attribution_data_annotated.loc[list_nodes].T
    score_mod = score.divide(score.std()).fillna(0)
    
    # train and test are the same 
    X_train = score_mod
    X_test = score_mod
    y_train = goterm_drugs
    y_test = goterm_drugs
    
    logreg = LogisticRegression(penalty="l2",solver="liblinear",max_iter=2000, C=10e-2,class_weight="balanced")
    # fit the model with data
    logreg.fit(X_train,y_train)
    y_pred=logreg.predict(X_test)

    #auc
    y_pred_proba = logreg.predict_proba(X_test)[::,1]
    GO_terms_auc_log_final[goterm] = metrics.roc_auc_score(y_test, y_pred_proba)

    precision, recall, thresholds = metrics.precision_recall_curve(y_test, y_pred_proba)
    GO_terms_aupr_log_final[goterm] = metrics.auc(recall, precision)
    GO_terms_precision_log_final[goterm] = metrics.recall_score(y_test, y_pred)
    models_log[goterm]=logreg
len(models_log)

Final model AUC

GO_terms_auc_log_df_final = pd.DataFrame(list(GO_terms_auc_log_final.items()),columns = ['goterm','auc']).set_index("goterm")
GO_terms_auc_log_df_final = GO_terms_auc_log_df_final.dropna()
GO_terms_auc_log_df_final.sort_values(by=["auc"], ascending=False)
sns.set(rc={'figure.figsize':(6,4)})
perc = str(round((100*len(GO_terms_auc_log_df_final[GO_terms_auc_log_df_final["auc"]>0.7])/len(GO_terms_auc_log_df_final)),2))+"%"
N, bins, patches = plt.hist(GO_terms_auc_log_df_final, color=CB_color_cycle[6],bins=50, linewidth=0.1)

for i in range(0,len(bins)-1):
    if bins[i]>0.7:
        patches[i].set_facecolor(CB_color_cycle[2])

plt.xlabel("AUC (logistic 1)", fontsize=16)  
plt.title(perc, fontsize=16)
# con el que mejor funciona es con la suma normal del attribution 

Final model AUPR

GO_terms_aupr_log_df_final = pd.DataFrame(list(GO_terms_aupr_log_final.items()),columns = ['goterm','aupr']).set_index("goterm")
GO_terms_aupr_log_df_final = GO_terms_aupr_log_df_final.dropna()
GO_terms_aupr_log_df_final.sort_values(by=["aupr"], ascending=False).head()
# TENGO PROBLEMA CON EL RECALL 
sns.set(rc={'figure.figsize':(5,3)})
perc = str(round((100*len(GO_terms_aupr_log_df_final[GO_terms_aupr_log_df_final["aupr"]>0.7])/len(GO_terms_aupr_log_df_final)),2))+"%"
N, bins, patches = plt.hist(GO_terms_aupr_log_df_final, color=CB_color_cycle[6],bins=50, linewidth=0.1)
for i in range(0,len(bins)-1):
    if bins[i]>0.7:
        patches[i].set_facecolor(CB_color_cycle[3])

plt.xlabel("AUPR", fontsize=16)  
plt.title(perc, fontsize=16)

Final model Recall

GO_terms_precision_log_df_final = pd.DataFrame(list(GO_terms_precision_log_final.items()),columns = ['goterm','precision']).set_index("goterm")
GO_terms_precision_log_df_final = GO_terms_precision_log_df_final.dropna()
GO_terms_precision_log_df_final.sort_values(by=["precision"], ascending=False).head()
perc = str(round((100*len(GO_terms_precision_log_df_final[GO_terms_precision_log_df_final["precision"]>0.69])/len(GO_terms_precision_log_df_final)),2))+"%"
N, bins, patches = plt.hist(GO_terms_precision_log_df_final, color=CB_color_cycle[6],bins=50, linewidth=0.1)

for i in range(0,len(bins)-1):
    if bins[i]>0.69:
        patches[i].set_facecolor(CB_color_cycle[4])

plt.xlabel("Recall", fontsize=16)  
plt.title(perc, fontsize=16)

Predict for a new drug

Make predictions

unknown = list(set(attribution_data_all.columns)-set(attribution_data_annotated.columns))

Get the probabilities for all unknown drugs

predictions = {}
probabilities = {}
probabilities_unknown = pd.DataFrame()

for drug in unknown:
    for goterm in models_log.keys():

        list_nodes = list(models_log[goterm].feature_names_in_) # Extract the feature names from the model (those are the attributions we need)

        score = attribution_data_all.loc[list_nodes][drug].to_frame().T 
        score_mod = score.divide(attribution_data_annotated.loc[list_nodes].T.std()).fillna(0) #divide by std of each neuron, only use drugs that trained the models

        #predictions[goterm]=models_log[goterm].predict(score_mod)

        #auc
        probabilities[goterm] = models_log[goterm].predict_proba(score_mod)[::,1]

        drug_probs = pd.DataFrame.from_dict(probabilities).T
        drug_probs.columns = [drug]
        
    print(drug)
    probabilities_unknown = pd.concat([probabilities_unknown,drug_probs], axis=1)
# Save file
with open(resultsdir+'probabilities_unknown_MG2_log_sum.pkl', 'wb') as dictionary_file:
  pickle.dump(probabilities_unknown, dictionary_file)

IMPORT file

# To import dataframe created before
with open(resultsdir+'probabilities_unknown_MG2_log_sum.pkl', 'rb') as dictionary_file:
    probabilities_unknown = pickle.load(dictionary_file)  

import pyreadr pyreadr.write_rdata(“C:/Users/ksada/OneDrive - Tecnun/SparseGO_Rdata/cv_allsamples_mutations/”+“probabilities_known.RData”, probabilities_unknown.reset_index(), df_name=“probabilities_known”)

Study drug with unknown MOA

Choose drug with unknown MOA…

combobox_u = interactive(f, drug=widgets.Combobox(options=unknown))
display(combobox_u)
selected_drug_u_name = combobox_u.result
probabilities_df = pd.DataFrame.from_dict(probabilities_unknown.loc[:,selected_drug_u_name]).reset_index()
probabilities_df.columns  = ["GO_term","probability"]
probabilities_df = probabilities_df.merge(real_go_info_log, on="GO_term")
probabilities_df.sort_values(by=["probability"], ascending=False)
sns.set(rc={'figure.figsize':(15,8)})
ax = sns.boxplot(x="layer_number", y="probability", data=probabilities_df, order=[7,6,5,4,3,2,1,0],showfliers=False)
ax = ax.set(xlabel='General terms                                                    -                                                          Specific terms')
  • bendamustine –> buen ejemplo, parece que tiene sentido lo que sale, es un farmaco para la leucemia y salen cosas de la sangre https://pubchem.ncbi.nlm.nih.gov/compound/65628#section=Chemical-Vendors
  • temozolomide –> es para Glioblastoma y me sale brain de los más altos y algo de calcium que tiene algo que ver
  • Bleomycin –>

Look for the GO terms (neurons) that vary the most variance between drugs…

# Attribution
topvariance = list(attribution_data_all.var(axis=1).sort_values(axis=0, ascending=False)[0:100].index.values)
data_array = attribution_data_all.loc[topvariance].T

Or cluster by the probability…

# Probabilities
topvariance = list(probabilities_unknown.var(axis=1).sort_values(axis=0, ascending=False)[0:200].index.values)
data_array = probabilities_unknown.loc[topvariance].T
import plotly.graph_objects as go
import plotly.figure_factory as ff
from scipy.spatial.distance import pdist, squareform

# get data
labelsGOterms = np.array(data_array.columns)
labelsDrugs = np.array(data_array.index)

# Initialize figure by creating upper dendrogram
fig = ff.create_dendrogram(data_array.T, orientation='bottom',labels=labelsGOterms)
for i in range(len(fig['data'])):
    fig['data'][i]['yaxis'] = 'y2'

# Create Side Dendrogram
dendro_side = ff.create_dendrogram(data_array, orientation='right',labels=labelsDrugs)
for i in range(len(dendro_side['data'])):
    dendro_side['data'][i]['xaxis'] = 'x2'

# Add Side Dendrogram Data to Figure
for data in dendro_side['data']:
    fig.add_trace(data)

# Create Heatmap

heatmap = [
    go.Heatmap(
        x = fig['layout']['xaxis']['ticktext'],
        y = dendro_side['layout']['yaxis']['ticktext'],
        z = data_array.loc[dendro_side['layout']['yaxis']['ticktext'],fig['layout']['xaxis']['ticktext']],
         zmin=0, zmax=1
    )
]

heatmap[0]['x'] = fig['layout']['xaxis']['tickvals']
heatmap[0]['y'] = dendro_side['layout']['yaxis']['tickvals']

# Add Heatmap Data to Figure
for data in heatmap:
    fig.add_trace(data)

fig['layout']['yaxis']['ticktext'] = dendro_side['layout']['yaxis']['ticktext']
fig['layout']['yaxis']['tickvals'] = np.asarray(dendro_side['layout']['yaxis']['tickvals'])
    
# Edit Layout
fig.update_layout({'width':800, 'height':1100,
                         'showlegend':False, 'hovermode': 'closest',
                         })
# Edit xaxis
fig.update_layout(xaxis={'domain': [.15, 1],
                                  'mirror': False,
                                  'showgrid': False,
                                  'showline': False,
                                  'zeroline': False,
                                  'ticks':""})
# Edit xaxis2
fig.update_layout(xaxis2={'domain': [0, .15],
                                   'mirror': False,
                                   'showgrid': False,
                                   'showline': False,
                                   'zeroline': False,
                                   'showticklabels': False,
                                   'ticks':""})

# Edit yaxis
fig.update_layout(yaxis={'domain': [0, .85],
                                  'mirror': False,
                                  'showgrid': False,
                                  'showline': False,
                                  'zeroline': False,
                                  'showticklabels': False,
                                  'ticks': ""
                        })
# Edit yaxis2
fig.update_layout(yaxis2={'domain':[.825, .975],
                                   'mirror': False,
                                   'showgrid': False,
                                   'showline': False,
                                   'zeroline': False,
                                   'showticklabels': False,
                                   'ticks':""})

# Plot!
fig.show()

Dendograms - Most commonly created as an output from hierarchical clustering. - The key to interpreting is to focus on the height at which any two objects are joined together. When the height of the link that joins the rows together is the smallest, their are the most similar. - Gives an idea of the number of clusters (but can’t determine the number).

from scipy.stats import ranksums 
GO_terms_wilcox = {}
number_ones_w = {}
sum_attribution = {}
#terms_direct_genes = {}
terms_all_genes = {}
# Perform wilcox
for goterm in slim_matrix.index:
    goterm_drugs = slim_matrix.loc[[goterm]].T
    goterm_drugs.columns = ["slim"]
    score = attribution_data_all.loc[[goterm]].T
    score.columns = ["score"]
    slim_score = goterm_drugs.join(score)
    slim_score.columns = ["slim","score"]
    number_ones_w[goterm] = sum(goterm_drugs.values.flatten())
    sum_attribution[goterm] = sum(score.values.flatten())
    #terms_direct_genes[goterm]=len(term_direct_gene_map[goterm[:-2]])
    terms_all_genes[goterm]=(term_size_map[goterm[:-2]])
    GO_terms_wilcox[goterm] = ranksums(slim_score.loc[slim_score["slim"] == 1]["score"], slim_score.loc[slim_score["slim"] == 0]["score"]).pvalue
    #GO_terms_wilcox[goterm] = ranksums(slim_score.loc[slim_score["slim"] == 1]["score"], slim_score.loc[slim_score["slim"] == 0]["score"],alternative="greater").pvalue
GO_terms_wilcox_df = pd.DataFrame(list(GO_terms_wilcox.items()),columns = ['goterm','score']).set_index("goterm")
GO_terms_wilcox_df = GO_terms_wilcox_df.dropna()
GO_terms_wilcox_df.sort_values(by=["score"], ascending=True)
GO_terms_wilcox_df

Percentage lower than 0.05…

perc = str(round((100*len(GO_terms_wilcox_df[GO_terms_wilcox_df["score"]<0.05])/len(GO_terms_wilcox_df)),2))+"%"
sns.set(rc={'figure.figsize':(5,3)})
#sns.histplot(data=GO_terms_wilcox_df, x="score", kde=True, color="olive", bins=100).set(title='Wilcox GO terms - '+perc)
histogram(GO_terms_wilcox_df["score"],CB_color_cycle[0],'Wilcox GO terms - '+perc,"Number of GO terms",n_bins=200)

Draw

goterm="GO:1903077_1"
slim = slim_matrix.loc[[goterm]].T
slim.columns = ["slim"]
score = attribution_data_annotated.loc[[goterm]].T
score.columns = ["score"]
score.index = slim.index
plot = slim.join(score)
plot.columns = ["slim","score"]
ax = sns.boxplot(x="slim", y="score", data=plot )
# Plotting the KDE Plot
sns.kdeplot(plot.loc[plot["slim"] == 1]["score"], color='orange', shade=True, label=1)
sns.kdeplot(plot.loc[plot["slim"] == 0]["score"], color='blue', shade=True, label=0)
plt.xlabel('Attribution')
plt.ylabel('Probability Density')
slim.sum()
len(plot)

esta no tiene sentido si no es absoluto el valor (porque puede afectar o positiva o negativamente el attribution)

from scipy.stats import ranksums 
drugs_wilcox = {}
# Perform wilcox
number_parents = {}
levels = {}
for i in range(0,len(slim_matrix.index)):
    term = slim_matrix.index[i][:-2]
    number_parents[slim_matrix.index[i]]=len([source for source, _ in  dG.in_edges(term)])
    levels[slim_matrix.index[i]]=level_number[term]-1
levels = pd.DataFrame.from_dict(levels, orient='index')
number_parents = pd.DataFrame.from_dict(number_parents, orient='index')

for drug in slim_matrix.columns:
    slim_score = pd.concat([slim_matrix[drug], attribution_data_all[drug],levels], axis=1)
    slim_score.columns = ["slim","score","levels"]
    #slim_score = slim_score.loc[slim_score["levels"] != 7]
    drugs_wilcox[drug] = ranksums(slim_score.loc[slim_score["slim"] == 1]["score"], slim_score.loc[slim_score["slim"] == 0]["score"]).pvalue
    #drugs_wilcox[drug] = ranksums(slim_score.loc[slim_score["slim"] == 1]["score"], slim_score.loc[slim_score["slim"] == 0]["score"],alternative="greater").pvalue
drugs_wilcox_df = pd.DataFrame(list(drugs_wilcox.items()),columns = ['goterm','score']).set_index("goterm")
drugs_wilcox_df = drugs_wilcox_df.dropna()
drugs_wilcox_df.sort_values(by=["score"], ascending=True)
len(drugs_wilcox_df[drugs_wilcox_df["score"]<0.05])
len(drugs_wilcox_df)
perc = str(round(100*(len(drugs_wilcox_df[drugs_wilcox_df["score"]<0.05])/len(drugs_wilcox_df)),2))+"%"
sns.set(rc={'figure.figsize':(4,4)})
histogram(drugs_wilcox_df["score"],CB_color_cycle[1],'Wilcox Drugs - '+perc,"Number of Drugs",n_bins=10)

Draw

drug="selumetinib"
plot = pd.concat([slim_matrix[drug], attribution_data_all[drug],number_parents,levels],axis=1)
plot.columns = ["slim","score","parents","levels"]
sns.set(rc={'figure.figsize':(4,4)})
ax = sns.boxplot(x="slim", y="score", data=plot,showfliers=True )
ranksums(plot.loc[plot["slim"] == 1]["score"], plot.loc[plot["slim"] == 0]["score"]).pvalue
sns.set(rc={'figure.figsize':(15,8)})
ax = sns.boxplot(x="levels", y="score", hue="slim", data=plot, order=[7,6,5,4,3,2,1,0])
ax = ax.set(xlabel='General terms                                                    -                                                          Specific terms')
vals = list()
for i in range(0,8):
    plot_level = plot.loc[plot["levels"] == i]
    pvalue = ranksums(plot_level.loc[plot_level["slim"] == 1]["score"], plot_level.loc[plot_level["slim"] == 0]["score"]).pvalue
    vals.append(pvalue) 
    print("P-value level "+str(i)+": "+str(pvalue))

    
from scipy.stats import combine_pvalues
cleanedvals  = [x for x in vals if ~np.isnan(x)] # delete nans, some levels have only 1 class
combine_pvalues(cleanedvals,method='fisher',weights=None)
sns.set(rc={'figure.figsize':(20,8)})
ax = sns.boxplot(x="parents", y="score", hue="slim", data=plot)
len(plot.loc[plot["slim"] == 1])/6
len(plot.loc[plot["slim"] == 0])/6

Wilcox by layers and add by fisher

from scipy.stats import ranksums, combine_pvalues

drugs_wilcox_levels = {}
# Perform wilcox
for drug in slim_matrix.columns:
    slim_score = pd.concat([slim_matrix[drug], attribution_data_all[drug],number_parents,levels], axis=1)
    slim_score.columns = ["slim","score","parents","levels"]
    
    vals = list()
    for i in range(1,27):
        slim_score_level = slim_score.loc[slim_score["parents"] == i]
        pvalue = ranksums(slim_score_level.loc[slim_score_level["slim"] == 1]["score"], slim_score_level.loc[slim_score_level["slim"] == 0]["score"]).pvalue
        vals.append(pvalue) 
        
    cleanedvals  = [x for x in vals if ~np.isnan(x)] # delete nans, some levels have only 1 class
    s, drugs_wilcox_levels[drug] = combine_pvalues(cleanedvals,method='fisher',weights=None)
    print(drug)
slim_score
drugs_wilcox_levels_df = pd.DataFrame(list(drugs_wilcox_levels.items()),columns = ['goterm','score']).set_index("goterm")
drugs_wilcox_levels_df = drugs_wilcox_levels_df.dropna()
drugs_wilcox_levels_df.sort_values(by=["score"], ascending=True)
sns.set(rc={'figure.figsize':(4,4)})
perc = str(round(100*(len(drugs_wilcox_levels_df[drugs_wilcox_levels_df["score"]<0.05])/len(drugs_wilcox_levels_df)),2))+"%"
histogram(drugs_wilcox_levels_df["score"],CB_color_cycle[1],'Wilcox Drugs - '+perc,"Number of Drugs",n_bins=10)
len(slim_matrix.columns)

SVM

from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn import svm
slim_matrix_single_neuron = pd.DataFrame(0, index=sparseGO_terms, columns=slim_matrix.columns)
preds_svm_matrix = pd.DataFrame(0, index=sparseGO_terms, columns=slim_matrix.columns)
platt_matrix = pd.DataFrame(0, index=sparseGO_terms, columns=slim_matrix.columns)
distance_matrix = pd.DataFrame(0, index=sparseGO_terms, columns=slim_matrix.columns)
delta_logits_matrix  = pd.DataFrame(0, index=sparseGO_terms, columns=slim_matrix.columns)

Create models

Regression models…

# Dictionaries to store results
GO_terms_auc_svm = {}
GO_terms_aupr_svm = {}
GO_terms_precision_svm = {}
GO_terms_auc_delta_logits = {}

# Perform logistic
for goterm in sparseGO_terms:
    # if (real_go_info[real_go_info["GO_term"]==goterm+"_1"]["layer_number"]).values >3:
    #     continue
    
    # store results of each cross validation
    all_y_test = []
    all_y_pred_proba = []
    all_y_pred_proba_dis = []
    all_y_pred = []
    all_y_names = []

    goterm_drugs = slim_matrix.loc[[goterm+"_"+str(1)]].values.flatten()

    if sum(goterm_drugs) <= 8: # at least 2 annotated drugs in each group
            continue

    list_nodes = []
    for i in range(1,7):
        list_nodes.append(goterm+"_"+str(i))

    score = attribution_data_annotated.loc[list_nodes].T
    #score_mod = score
    score_mod = score.divide(score.std()).fillna(0) # AFECTA MUCHO

    # Separate drugs in 4 groups for cross-validation -----

    # Split data in 2 groups (with train_test_split in order to have 0s in both groups)
    X_part1,X_part2,y_part1,y_part2=train_test_split(score_mod,goterm_drugs,test_size=0.50,random_state=0,stratify=goterm_drugs)
    # Split data again in 4 groups (split data previously split)
    X_group1,X_group2,y_group1,y_group2=train_test_split(X_part1,y_part1,test_size=0.50,random_state=0,stratify=y_part1)
    X_group3,X_group4,y_group3,y_group4=train_test_split(X_part2,y_part2,test_size=0.50,random_state=0,stratify=y_part2)

    for i in  range(1,5):
        vector = range(0,5)
        group_number = str(i)
        X_test = globals()["X_group"+group_number]
        y_test = globals()["y_group"+group_number]

        # Use the other 3 groups for training 
        keep = list({1,2,3,4}-{int(group_number)}) # remove group number of current test 
        X_train = pd.concat((globals()["X_group"+str(keep[0])],globals()["X_group"+str(keep[1])],globals()["X_group"+str(keep[2])]))
        y_train = np.concatenate((globals()["y_group"+str(keep[0])],globals()["y_group"+str(keep[1])],globals()["y_group"+str(keep[2])]))
        
        #gamma = 1/(X_train.shape[1]*X_train.to_numpy().var())
        gamma = "scale"
        C=1
        
        svm_model = svm.SVC(C=C,gamma=gamma, kernel='rbf',
                           class_weight="balanced",
                            tol=0.001,
                            probability=True,
                            random_state=1234)
        # svm_model = svm.SVC(gamma='auto', kernel='rbf',class_weight="balanced",probability=True)
        
        # fit the model with data
        svm_model.fit(X_train,y_train)
        y_pred=svm_model.predict(X_test)
        y_pred_proba = svm_model.predict_proba(X_test)[::,1] # platt values
        y_pred_proba_dis = svm_model.decision_function(X_test) # An SVM returns a real-valued prediction for each of the input data samples, which corresponds to its distance from the separating hyperplane.
        #  decision_function SORTS the results from most probable class to the least probable one.
        
        all_y_test.append(y_test)
        all_y_pred_proba.append(y_pred_proba)
        all_y_pred_proba_dis.append(y_pred_proba_dis)
        all_y_pred.append(y_pred)
        all_y_names.append(X_test.index)

    all_y_test = np.concatenate(all_y_test)
    all_y_pred_proba = np.concatenate(all_y_pred_proba)
    all_y_pred_proba_dis = np.concatenate(all_y_pred_proba_dis)
    all_y_names = np.concatenate(all_y_names)
    all_y_pred = np.concatenate(all_y_pred)
    
    percentage_go_annotations = sum(all_y_test)/len(all_y_test)
    logits_apriori=np.log(percentage_go_annotations/(1-percentage_go_annotations))
    logits_apost= np.log(all_y_pred_proba/(1-all_y_pred_proba))
    delta_logits = logits_apost-logits_apriori

    platt_matrix.loc[goterm,all_y_names] = all_y_pred_proba
    distance_matrix.loc[goterm,all_y_names] = all_y_pred_proba_dis
    slim_matrix_single_neuron.loc[goterm,all_y_names] = all_y_test
    preds_svm_matrix.loc[goterm,all_y_names] = all_y_pred
    
    delta_logits_matrix.loc[goterm,all_y_names] = delta_logits

    GO_terms_auc_delta_logits[goterm] = metrics.roc_auc_score(all_y_test, delta_logits)
    GO_terms_auc_svm[goterm] = metrics.roc_auc_score(all_y_test, all_y_pred_proba)

    precision, recall, thresholds = metrics.precision_recall_curve(all_y_test, all_y_pred_proba)
    GO_terms_aupr_svm[goterm] = metrics.auc(recall, precision)
    GO_terms_precision_svm[goterm] = metrics.precision_score(all_y_test, all_y_pred)
# done with platt values
GO_terms_auc_svm_df = pd.DataFrame(list(GO_terms_auc_svm.items()),columns = ['goterm','auc']).set_index("goterm")
GO_terms_auc_svm_df = GO_terms_auc_svm_df.dropna()
GO_terms_auc_svm_df.sort_values(by=["auc"], ascending=False).head()
auc
goterm
GO:0036289 0.999708
GO:0060440 0.994743
GO:0042149 0.971292
GO:1902455 0.969545
GO:0001556 0.965979
print("There are " +str(len(GO_terms_auc_svm_df))+ " svm models.")
There are 939 svm models.
# only keep goterms that have a model 
platt_matrix = platt_matrix.loc[list(GO_terms_auc_svm_df.index),:]
distance_matrix = distance_matrix.loc[list(GO_terms_auc_svm_df.index),:]
slim_matrix_single_neuron  = slim_matrix_single_neuron.loc[list(GO_terms_auc_svm_df.index),:]
preds_svm_matrix  = preds_svm_matrix.loc[list(GO_terms_auc_svm_df.index),:]
delta_logits_matrix  = delta_logits_matrix.loc[list(GO_terms_auc_svm_df.index),:]

AUC histogram

sns.set(rc={'figure.figsize':(10,6)})
fig, ax = plt.subplots()
perc = str(round((100*len(GO_terms_auc_svm_df[GO_terms_auc_svm_df["auc"]>0.69])/len(GO_terms_auc_svm_df)),2))+"%"
N, bins, patches = plt.hist(GO_terms_auc_svm_df, color=CB_color_cycle[6],bins=50, linewidth=0.1)

for i in range(0,len(bins)-1):
    if bins[i]>0.69:
        patches[i].set_facecolor(CB_color_cycle[2])

plt.yticks(fontsize=16)
plt.xticks(fontsize=16)

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_color('#DDDDDD')

# Second, remove the ticks as well.
ax.tick_params(bottom=False, left=True)

# Third, add a horizontal grid (but keep the vertical grid hidden).
# Color the lines a light gray as well.
ax.set_axisbelow(True)
ax.yaxis.grid(True, color='#EEEEEE')
ax.xaxis.grid(False)

plt.xlabel("AUC value", fontsize=20)
plt.ylabel("Number of GO term models", fontsize=20)
colors2 = {'GO term models with AUC>=0.7':CB_color_cycle[2]}  
labels = list(colors2.keys())
handles = [plt.Rectangle((0,0),1,1, color=colors2[label]) for label in labels]
plt.legend(handles, labels,fontsize=20, loc="lower left", bbox_to_anchor=(0.35,-0.35))
plt.text(0.71, 8, str(perc), fontsize=20,color='#333333')
plt.title("Overall performance of the models using expression", fontsize=24)
# con el que mejor funciona es con la suma normal del attribution 
fig.tight_layout()
fig.savefig(resultsdir+'modelsAUCsvm.png', transparent=True)

AUC waterfall plot

GO_terms_auc_svm_df =GO_terms_auc_svm_df.sort_values(by=["auc"], ascending=False)
plt.rcParams['figure.figsize'] = (12, 9)
drugs = GO_terms_auc_svm_df.index
rhos = GO_terms_auc_svm_df["auc"]

percentage = round((sum(rhos>0.69)/len(rhos))*100,1)

fig, ax = plt.subplots()
#colors = ['#208EA3' if (x < 0.5) else '#A4C61A' for x in rhos ]
colors = ['#C9C9C9' if (x < 0.69) else "#6492CA" for x in rhos ]
ax.bar(
    x=drugs,
    height=rhos,
    edgecolor=colors,
    linewidth=2
)
plt.xticks([])
plt.yticks(fontsize=28)


# First, let's remove the top, right and left spines (figure borders)
# which really aren't necessary for a bar chart.
# Also, make the bottom spine gray instead of black.
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
#ax.spines['bottom'].set_color('#DDDDDD')

# Second, remove the ticks as well.
ax.tick_params(bottom=False, left=False)

# Third, add a horizontal grid (but keep the vertical grid hidden).
# Color the lines a light gray as well.
ax.set_axisbelow(False)
ax.yaxis.grid(False)
#ax.yaxis.grid(True, color='#EEEEEE')
ax.xaxis.grid(False)


# Add labels and a title. Note the use of `labelpad` and `pad` to add some
# extra space between the text and the tick labels.
ax.set_xlabel('SVM models', labelpad=-30, color='#333333',fontsize=50)
ax.set_ylabel('AUC-ROC value', labelpad=15, color='#333333',fontsize=50)
ax.set_title('', color='#333333',
             weight='bold')

colors2 = {'High confidence drugs (r>0.5)':'#A4C61A'}  
labels = list(colors2.keys())
handles = [plt.Rectangle((0,0),1,1, color=colors2[label]) for label in labels]
#plt.legend(handles, labels,fontsize=40, loc="lower left",bbox_to_anchor=(0, -0.215))
plt.text(77, 0.32, str(percentage)+"%", fontsize=60,color='#000000')

plt.ylim((-0.1,1.1))
# Make the chart fill out the figure better.
fig.tight_layout()
fig.savefig(resultsdir+'WaterfallModelsSVM.png', transparent=True)

AUC boxplot by parents

# Add number of parents
number_parents = {}
levels = {}
for i in range(0,len(GO_terms_auc_svm_df.index)):
    term = GO_terms_auc_svm_df.index[i]
    number_parents[GO_terms_auc_svm_df.index[i]]=len([source for source, _ in  dG.in_edges(term)])
    levels[GO_terms_auc_svm_df.index[i]]=level_number[term]-1
levels = pd.DataFrame.from_dict(levels, orient='index')
number_parents = pd.DataFrame.from_dict(number_parents, orient='index')

GO_terms_auc_svm_df = pd.concat([GO_terms_auc_svm_df, levels,number_parents], axis=1)
GO_terms_auc_svm_df.columns = ["auc","levels","parents"]
GO_terms_auc_svm_df.head()
auc levels parents
GO:0000077 0.284021 1 3
GO:0045737 0.835954 0 8
GO:0000082 0.732331 2 4
GO:1900087 0.593301 0 10
GO:2000134 0.865329 1 9
import plotly.express as px

c = ['#E8384F', '#FD817D', '#FDAE33',
         '#EECC16', '#A4C61A', '#37A862',"#208EA3","#3B6EAB"]

df = px.data.tips()
fig = px.box(GO_terms_auc_svm_df, x="levels", y="auc",
             color="levels",
            color_discrete_sequence=c,
             width =600,
             height=400,
              template="simple_white",
              labels=dict(levels="Level of GO hierarchy", auc="AUC-ROC")
            )
fig.update_traces(width=0.9)

fig.add_shape( # add a horizontal "target" line
    type="line", line_color="salmon", line_width=3, opacity=1, line_dash="dot",
    x0=0, x1=1, xref="paper", y0=0.7, y1=0.7, yref="y"
)


fig.update_layout(
   title=dict(text="<b> AUC value grouped by level of GO hierarchy <b>",
             x=0.5,
             y=0.9,
              font=dict(size=18),
              xanchor='center',
              yanchor='top'),
    xaxis=dict(ticks="", showticklabels=False, showgrid=False, zeroline=False),
    yaxis=dict(ticks="", showticklabels=True, showgrid=True, zeroline=False),
 #   yaxis_range=[min(yy.flatten()),max(yy.flatten())],
  #  xaxis_range=[min(xx.flatten()),max(xx.flatten())],
    legend=dict(x=1.1, y=1, orientation="v",font=dict(size=11)),
    paper_bgcolor='rgba(0,0,0,0)',
    font=dict(family='Roboto',color= "#36382E",size=15)
    )

fig.show()

TOP 15 PREDICTED GO TERMS

top15goterms= np.array(GO_terms_auc_svm_df.sort_values(by=["auc"], ascending=False)[0:15].index)

Get Top GO term names

top15goterms_1 = []
for goterm in top15goterms:
    top15goterms_1.append(goterm+"_"+str(1))
real_go_info_mod_best = real_go_info[real_go_info.GO_term.isin(top15goterms_1)]
real_go_info_mod_best.GO_term = real_go_info_mod_best.GO_term.str.replace("_1","")
top15goterms_auc = GO_terms_auc_svm_df.sort_values(by=["auc"], ascending=False)[0:15].reset_index()
top15goterms_auc.columns=["GO_term","auc","levels","parents"]
top15goterms_auc = top15goterms_auc.merge(real_go_info_mod_best[real_go_info_mod_best["GO_term"].isin(top15goterms)], on="GO_term")
top15goterms_auc
GO_term auc levels parents Name layer_number
0 GO:0036289 0.999708 0 2 Peptidyl-serine autophosphorylation (1) 0.0
1 GO:0060440 0.994743 0 4 Trachea formation (1) 0.0
2 GO:0042149 0.971292 0 1 Cellular response to glucose starvation (1) 0.0
3 GO:1902455 0.969545 0 2 Negative regulation of stem cell population maintenance (1) 0.0
4 GO:0001556 0.965979 0 6 Oocyte maturation (1) 0.0
5 GO:0045636 0.955115 0 6 Positive regulation of melanocyte differentiation (1) 0.0
6 GO:0010750 0.955000 0 4 Positive regulation of nitric oxide mediated signal transduction (1) 0.0
7 GO:0060020 0.949434 0 1 Bergmann glial cell differentiation (1) 0.0
8 GO:1902042 0.945804 0 4 Negative regulation of extrinsic apoptotic signaling pathway via death domain receptors (1) 0.0
9 GO:1902236 0.941667 0 12 Negative regulation of endoplasmic reticulum stress-induced intrinsic apoptotic signaling pathway (1) 0.0
10 GO:0070059 0.936432 1 2 Intrinsic apoptotic signaling pathway in response to endoplasmic reticulum stress (1) 1.0
11 GO:0051453 0.935521 1 2 Regulation of intracellular ph (1) 1.0
12 GO:0042659 0.931364 0 3 Regulation of cell fate specification (1) 0.0
13 GO:0006360 0.930046 2 7 Transcription by rna polymerase i (1) 2.0
14 GO:0006959 0.921730 2 2 Humoral immune response (1) 2.0

WORST 15 PREDICTED GO TERMS

worst15goterms= np.array(GO_terms_auc_svm_df.sort_values(by=["auc"], ascending=True)[0:15].index)

Get Worst GO term names

worst15goterms_1 = []
for goterm in worst15goterms:
    worst15goterms_1.append(goterm+"_"+str(1))
real_go_info_mod_worst = real_go_info[real_go_info.GO_term.isin(worst15goterms_1)]
real_go_info_mod_worst.GO_term = real_go_info_mod_worst.GO_term.str.replace("_1","")
worst15goterms_auc = GO_terms_auc_svm_df.sort_values(by=["auc"], ascending=True)[0:15].reset_index()
worst15goterms_auc.columns=["GO_term","auc","levels","parents"]
worst15goterms_auc.merge(real_go_info_mod_worst[real_go_info_mod_worst["GO_term"].isin(worst15goterms)], on="GO_term")
GO_term auc levels parents Name layer_number
0 GO:0000077 0.284021 1 3 Dna damage checkpoint signaling (1) 1.0
1 GO:0006869 0.299648 3 2 Lipid transport (1) 3.0
2 GO:0051302 0.314545 1 2 Regulation of cell division (1) 1.0
3 GO:0016485 0.318636 3 5 Protein processing (1) 3.0
4 GO:0019722 0.322272 2 1 Calcium-mediated signaling (1) 2.0
5 GO:0046854 0.326276 1 2 Phosphatidylinositol phosphate biosynthetic process (1) 1.0
6 GO:0060740 0.331825 1 6 Prostate gland epithelium morphogenesis (1) 1.0
7 GO:0060444 0.347273 1 8 Branching involved in mammary gland duct morphogenesis (1) 1.0
8 GO:0006919 0.352725 1 3 Activation of cysteine-type endopeptidase activity involved in apoptotic process (1) 1.0
9 GO:0032436 0.353421 1 14 Positive regulation of proteasomal ubiquitin-dependent protein catabolic process (1) 1.0
10 GO:0055119 0.353947 1 1 Relaxation of cardiac muscle (1) 1.0
11 GO:0001892 0.355979 1 5 Embryonic placenta development (1) 1.0
12 GO:0031295 0.364518 0 8 T cell costimulation (1) 0.0
13 GO:0046620 0.365476 1 3 Regulation of organ growth (1) 1.0
14 GO:0008361 0.367423 2 1 Regulation of cell size (1) 2.0

AUPR histogram

GO_terms_aupr_svm_df = pd.DataFrame(list(GO_terms_aupr_svm.items()),columns = ['goterm','aupr']).set_index("goterm")
GO_terms_aupr_svm_df = GO_terms_aupr_svm_df.dropna()
GO_terms_aupr_svm_df.sort_values(by=["aupr"], ascending=False).head()
aupr
goterm
GO:0036289 0.996209
GO:0006807 0.945077
GO:0050896 0.921869
GO:0043170 0.909722
GO:0009058 0.900903
# Add number of parents
number_parents = {}
levels = {}
for i in range(0,len(GO_terms_aupr_svm_df.index)):
    term = GO_terms_aupr_svm_df.index[i]
    number_parents[GO_terms_aupr_svm_df.index[i]]=len([source for source, _ in  dG.in_edges(term)])
    levels[GO_terms_aupr_svm_df.index[i]]=level_number[term]-1
levels = pd.DataFrame.from_dict(levels, orient='index')
number_parents = pd.DataFrame.from_dict(number_parents, orient='index')

GO_terms_aupr_svm_df = pd.concat([GO_terms_aupr_svm_df, levels,number_parents], axis=1)
GO_terms_aupr_svm_df.columns = ["aupr","levels","parents"]
c = ['#E8384F', '#FD817D', '#FDAE33',
         '#EECC16', '#A4C61A', '#37A862',"#208EA3","#3B6EAB"]

df = px.data.tips()
fig = px.box(GO_terms_aupr_svm_df, x="levels", y="aupr",
             color="levels",
            color_discrete_sequence=c,
             width =600,
             height=400,
              template="simple_white",
              labels=dict(levels="Level of GO hierarchy", aupr="AUPR")
            )
fig.update_traces(width=0.9)

fig.add_shape( # add a horizontal "target" line
    type="line", line_color="salmon", line_width=3, opacity=1, line_dash="dot",
    x0=0, x1=1, xref="paper", y0=0.7, y1=0.7, yref="y"
)


fig.update_layout(
   title=dict(text="<b> AUPR value grouped by level of GO hierarchy <b>",
             x=0.5,
             y=0.9,
              font=dict(size=18),
              xanchor='center',
              yanchor='top'),
    xaxis=dict(ticks="", showticklabels=False, showgrid=False, zeroline=False),
    yaxis=dict(ticks="", showticklabels=True, showgrid=True, zeroline=False),
 #   yaxis_range=[min(yy.flatten()),max(yy.flatten())],
  #  xaxis_range=[min(xx.flatten()),max(xx.flatten())],
    legend=dict(x=1.1, y=1, orientation="v",font=dict(size=11)),
    paper_bgcolor='rgba(0,0,0,0)',
    font=dict(family='Roboto',color= "#36382E",size=15)
    )

fig.show()
pio.write_image(fig, resultsdir+"AUPR_levels.png", width=600, height=400,scale=8)

Example prediction

def f2(goterm):    
    return goterm
combobox_go = interactive(f2, goterm=widgets.Combobox(options=list(GO_terms_auc_svm_df.sort_values(by=["auc"], ascending=False).index)))

Choose drug to study…

display(combobox_go)
selected_go = combobox_go.result
#auc
plt.rcParams['figure.figsize'] = (4, 2)
fpr, tpr, _ = metrics.roc_curve(slim_matrix_single_neuron.loc[selected_go],  platt_matrix.loc[selected_go])
auc = metrics.roc_auc_score(slim_matrix_single_neuron.loc[selected_go],  platt_matrix.loc[selected_go])
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()

plot = pd.concat([pd.DataFrame(slim_matrix_single_neuron.loc[selected_go]),pd.DataFrame(platt_matrix.loc[selected_go])], axis=1)
plot.columns = ["slim","probability"]
ax = sns.boxplot(x="slim", y="probability", data=plot,showfliers=False )

#auc 
fpr, tpr, _ = metrics.roc_curve(slim_matrix_single_neuron.loc[selected_go],  delta_logits_matrix.loc[selected_go])
auc = metrics.roc_auc_score(slim_matrix_single_neuron.loc[selected_go],  delta_logits_matrix.loc[selected_go])
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()

plot = pd.concat([pd.DataFrame(slim_matrix_single_neuron.loc[selected_go]),pd.DataFrame(delta_logits_matrix.loc[selected_go])], axis=1)
plot.columns = ["slim","probability"]
ax = sns.boxplot(x="slim", y="probability", data=plot,showfliers=False )

plt.rcParams['figure.figsize'] = (2, 2)
metrics.ConfusionMatrixDisplay.from_predictions(slim_matrix_single_neuron.loc[selected_go], preds_svm_matrix.loc[selected_go])
plt.grid(visible=None)

print("Accuracy:",metrics.accuracy_score(slim_matrix_single_neuron.loc[selected_go], preds_svm_matrix.loc[selected_go]))
print("Precision:",metrics.precision_score(slim_matrix_single_neuron.loc[selected_go], preds_svm_matrix.loc[selected_go]))
print("Recall:",metrics.recall_score(slim_matrix_single_neuron.loc[selected_go], preds_svm_matrix.loc[selected_go])) #TP / (TP+FN)
print("AUC with score:",auc) #TP / (TP+FN)
Accuracy: 0.9782608695652174
Precision: 0.7894736842105263
Recall: 0.9375
AUC with score: 0.9947429906542057

TN - FP

FN - TP

plt.rcParams['figure.figsize'] = (4, 2)
precision, recall, thresholds = metrics.precision_recall_curve(slim_matrix_single_neuron.loc[selected_go],  preds_svm_matrix.loc[selected_go])
auc_precision_recall = metrics.auc(recall, precision)
plt.plot(recall, precision,label=str(auc_precision_recall))
plt.legend(loc=4)
plt.show()

METRICS drugs

auc_drugs = {}
aupr_drugs = {}
precision_drugs = {}
for drug in list(slim_matrix_single_neuron.columns):
    if slim_matrix_single_neuron.loc[:,drug].sum() ==0:
        continue
    #fpr, tpr, _ = metrics.roc_curve(slim_matrix_single_neuron.loc[:,drug], logits_matrix.loc[:,drug])
    #auc_drugs[drug]  = metrics.auc(fpr, tpr)
    auc_drugs[drug] = metrics.roc_auc_score(slim_matrix_single_neuron.loc[:,drug],  platt_matrix.loc[:,drug])
    precision, recall, thresholds = metrics.precision_recall_curve(slim_matrix_single_neuron.loc[:,drug],  platt_matrix.loc[:,drug])
    aupr_drugs[drug] = metrics.auc(recall, precision)
    precision_drugs[drug] = metrics.precision_score(slim_matrix_single_neuron.loc[:,drug],  preds_svm_matrix.loc[:,drug])

auc_drugs_df = pd.DataFrame(list(auc_drugs.items()),columns = ['goterm','auc']).set_index("goterm")
auc_drugs_df = auc_drugs_df.dropna()

aupr_drugs_df = pd.DataFrame(list(aupr_drugs.items()),columns = ['goterm','aupr']).set_index("goterm")
aupr_drugs_df = aupr_drugs_df.dropna()

precision_drugs_df = pd.DataFrame(list(precision_drugs.items()),columns = ['goterm','precision']).set_index("goterm")
precision_drugs_df = precision_drugs_df.dropna()

AUC histogram drugs

sns.set(rc={'figure.figsize':(10,6)})
fig, ax = plt.subplots()
perc = str(round((100*len(auc_drugs_df[auc_drugs_df["auc"]>0.7])/len(auc_drugs_df)),2))+"%"
N, bins, patches = plt.hist(auc_drugs_df, color=CB_color_cycle[6],bins=50, linewidth=0.1)

for i in range(0,len(bins)-1):
    if bins[i]>0.7:
        patches[i].set_facecolor(CB_color_cycle[5])

plt.yticks(fontsize=16)
plt.xticks(fontsize=16)

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_color('#DDDDDD')

# Second, remove the ticks as well.
ax.tick_params(bottom=False, left=True)

# Third, add a horizontal grid (but keep the vertical grid hidden).
# Color the lines a light gray as well.
ax.set_axisbelow(True)
ax.yaxis.grid(True, color='#EEEEEE')
ax.xaxis.grid(False)

plt.xlabel("AUC value", fontsize=20)
plt.ylabel("Number of drugs", fontsize=20)
colors2 = {'Drugs with AUC>=0.7':CB_color_cycle[5]}  
labels = list(colors2.keys())
handles = [plt.Rectangle((0,0),1,1, color=colors2[label]) for label in labels]
plt.legend(handles, labels,fontsize=20, loc="lower left", bbox_to_anchor=(0.35,-0.35))
plt.text(0.79, 6, str(perc), fontsize=20,color='#333333')
plt.title("Overall performance by drugs using mutations", fontsize=24)
# con el que mejor funciona es con la suma normal del attribution 
fig.tight_layout()
fig.savefig(resultsdir+'drugsAUC.png', transparent=True)

AUC waterfall plot drugs

auc_drugs_df =auc_drugs_df.sort_values(by=["auc"], ascending=False)
plt.rcParams['figure.figsize'] = (12, 9)
drugs = auc_drugs_df.index
rhos = auc_drugs_df["auc"]

percentage = round((sum(rhos>0.69)/len(rhos))*100,1)

fig, ax = plt.subplots()
#colors = ['#208EA3' if (x < 0.5) else '#A4C61A' for x in rhos ]
colors = ['#C9C9C9' if (x < 0.69) else "#B678BE" for x in rhos ]
ax.bar(
    x=drugs,
    height=rhos,
    edgecolor=colors,
    linewidth=3
)
plt.xticks([])
plt.yticks(fontsize=28)


# First, let's remove the top, right and left spines (figure borders)
# which really aren't necessary for a bar chart.
# Also, make the bottom spine gray instead of black.
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
#ax.spines['bottom'].set_color('#DDDDDD')

# Second, remove the ticks as well.
ax.tick_params(bottom=False, left=False)

# Third, add a horizontal grid (but keep the vertical grid hidden).
# Color the lines a light gray as well.
ax.set_axisbelow(False)
ax.yaxis.grid(False)
#ax.yaxis.grid(True, color='#EEEEEE')
ax.xaxis.grid(False)


# Add labels and a title. Note the use of `labelpad` and `pad` to add some
# extra space between the text and the tick labels.
ax.set_xlabel('Drugs', labelpad=-30, color='#333333',fontsize=50)
ax.set_ylabel('AUC-ROC value', labelpad=15, color='#333333',fontsize=50)
ax.set_title('', color='#333333',
             weight='bold')

colors2 = {'High confidence drugs (r>0.5)':'#A4C61A'}  
labels = list(colors2.keys())
handles = [plt.Rectangle((0,0),1,1, color=colors2[label]) for label in labels]
#plt.legend(handles, labels,fontsize=40, loc="lower left",bbox_to_anchor=(0, -0.215))
plt.text(77, 0.32, str(percentage)+"%", fontsize=60,color='#000000')

plt.ylim((-0.1,1.1))
# Make the chart fill out the figure better.
fig.tight_layout()
fig.savefig(resultsdir+'WaterfallModelsSVM_drugs.png', transparent=True)

AUPR histogram drugs

sns.set(rc={'figure.figsize':(5,3)})
perc = str(round((100*len(aupr_drugs_df[aupr_drugs_df["aupr"]>0.69])/len(aupr_drugs_df)),2))+"%"
N, bins, patches = plt.hist(aupr_drugs_df, color=CB_color_cycle[6],bins=50, linewidth=0.1)
for i in range(0,len(bins)-1):
    if bins[i]>0.69:
        patches[i].set_facecolor(CB_color_cycle[3])

plt.xlabel("AUPR drugs", fontsize=16)  
plt.title(perc, fontsize=16)
Text(0.5, 1.0, '33.62%')

Example drug prediction

def f(drug):    
    
    return drug
predictions_nodes = []
for goterm in list(platt_matrix.index):
    predictions_nodes.append(goterm+"_"+str(1))
# add names to go terms
real_go_info_svm= real_go_info[real_go_info.GO_term.isin(predictions_nodes)]
real_go_info_svm.GO_term = real_go_info_svm.GO_term.str.replace("_1","")
combobox = interactive(f, drug=widgets.Combobox(options=list(precision_drugs_df.sort_values(by=["precision"], ascending=False).index)))

Choose drug to study…

display(combobox)
selected_drug_name = combobox.result
sns.set(rc={'figure.figsize':(4,2)})
#auc
fpr, tpr, _ = metrics.roc_curve(slim_matrix_single_neuron.loc[:,selected_drug_name], platt_matrix.loc[:,selected_drug_name] )
auc = metrics.roc_auc_score(slim_matrix_single_neuron.loc[:,selected_drug_name],  platt_matrix.loc[:,selected_drug_name])
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()

plot = pd.concat([pd.DataFrame(slim_matrix_single_neuron.loc[:,selected_drug_name]),pd.DataFrame(platt_matrix.loc[:,selected_drug_name])], axis=1)
plot.columns = ["slim","svm score"]
ax = sns.boxplot(x="slim", y="svm score", data=plot,showfliers=False )

plot = pd.concat([pd.DataFrame(slim_matrix.loc[:,selected_drug_name]),pd.DataFrame(attribution_data_annotated.loc[:,selected_drug_name]*1e4)], axis=1)
plot.columns = ["slim","attribution"]
ax = sns.boxplot(x="slim", y="attribution", data=plot,showfliers=True )

metrics.ConfusionMatrixDisplay.from_predictions(slim_matrix_single_neuron.loc[:,selected_drug_name].round(), preds_svm_matrix.loc[:,selected_drug_name])
<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay at 0x2502863c8b0>

print("Accuracy:",metrics.accuracy_score(slim_matrix_single_neuron.loc[:,selected_drug_name], preds_svm_matrix.loc[:,selected_drug_name]))
print("Precision:",metrics.precision_score(slim_matrix_single_neuron.loc[:,selected_drug_name], preds_svm_matrix.loc[:,selected_drug_name]))
print("Recall:",metrics.recall_score(slim_matrix_single_neuron.loc[:,selected_drug_name], preds_svm_matrix.loc[:,selected_drug_name])) #TP / (TP+FN)
print("AUC with score:",auc) 
Accuracy: 0.663471778487753
Precision: 0.8651026392961877
Recall: 0.5221238938053098
AUC with score: 0.9947429906542057
# LOS LOGITS DE TEST!!
train_drug_logs = pd.DataFrame(delta_logits_matrix.loc[:,selected_drug_name]).reset_index()
train_drug_logs.columns  = ["GO_term","probability"]
train_drug_logs = train_drug_logs.merge(real_go_info_svm, on="GO_term")
train_drug_logs.sort_values(by=["probability"], ascending=False)
GO_term probability Name layer_number
578 GO:2000379 2.770126 Positive regulation of reactive oxygen species metabolic process (1) 1.0
253 GO:0043552 2.707570 Positive regulation of phosphatidylinositol 3-kinase activity (1) 0.0
80 GO:0010575 2.282492 Positive regulation of vascular endothelial growth factor production (1) 0.0
633 GO:0051301 2.245231 Cell division (1) 2.0
224 GO:0046777 2.053782 Protein autophosphorylation (1) 1.0
423 GO:1902533 2.014408 Positive regulation of intracellular signal transduction (1) 2.0
458 GO:0035025 1.952270 Positive regulation of rho protein signal transduction (1) 0.0
848 GO:0071670 1.887644 Smooth muscle cell chemotaxis (1) 0.0
348 GO:0006939 1.842256 Smooth muscle contraction (1) 2.0
98 GO:0001932 1.809520 Regulation of protein phosphorylation (1) 4.0
350 GO:0045987 1.783402 Positive regulation of smooth muscle contraction (1) 1.0
45 GO:0001501 1.682999 Skeletal system development (1) 4.0
908 GO:0051899 1.675960 Membrane depolarization (1) 2.0
653 GO:0072593 1.650121 Reactive oxygen species metabolic process (1) 3.0
115 GO:0060312 1.625762 Regulation of blood vessel remodeling (1) 0.0
926 GO:0060020 1.614859 Bergmann glial cell differentiation (1) 0.0
923 GO:0048170 1.598558 Positive regulation of long-term neuronal synaptic plasticity (1) 0.0
99 GO:0001934 1.593723 Positive regulation of protein phosphorylation (1) 3.0
853 GO:0038083 1.588712 Peptidyl-tyrosine autophosphorylation (1) 0.0
713 GO:0035726 1.577983 Common myeloid progenitor cell proliferation (1) 0.0
445 GO:0048008 1.552362 Platelet-derived growth factor receptor signaling pathway (1) 1.0
857 GO:0035584 1.496390 Calcium-mediated signaling using intracellular calcium source (1) 0.0
333 GO:1904019 1.481165 Epithelial cell apoptotic process (1) 1.0
933 GO:0051150 1.474264 Regulation of smooth muscle cell differentiation (1) 1.0
352 GO:0014827 1.453480 Intestine smooth muscle contraction (1) 0.0
218 GO:0006468 1.436484 Protein phosphorylation (1) 5.0
814 GO:0090037 1.416277 Positive regulation of protein kinase c signaling (1) 0.0
894 GO:0048017 1.399588 Inositol lipid-mediated signaling (1) 1.0
506 GO:0007286 1.376021 Spermatid development (1) 1.0
742 GO:0035733 1.371582 Hepatic stellate cell activation (1) 0.0
10 GO:0051403 1.369506 Stress-activated mapk cascade (1) 2.0
702 GO:0048146 1.349525 Positive regulation of fibroblast proliferation (1) 0.0
782 GO:1902042 1.347889 Negative regulation of extrinsic apoptotic signaling pathway via death domain receptors (1) 0.0
8 GO:0000165 1.328354 Mapk cascade (1) 3.0
531 GO:0007585 1.321161 Respiratory gaseous exchange by respiratory system (1) 1.0
492 GO:1905065 1.288165 Positive regulation of vascular associated smooth muscle cell differentiation (1) 0.0
181 GO:0006139 1.261437 Nucleobase-containing compound metabolic process (1) 6.0
670 GO:0031274 1.250440 Positive regulation of pseudopodium assembly (1) 0.0
424 GO:0030513 1.215934 Positive regulation of bmp signaling pathway (1) 0.0
12 GO:0070374 1.209229 Positive regulation of erk1 and erk2 cascade (1) 0.0
593 GO:0050865 1.191004 Regulation of cell activation (1) 5.0
562 GO:0050896 1.177718 Response to stimulus (1) 7.0
24 GO:0007346 1.156784 Regulation of mitotic cell cycle (1) 3.0
640 GO:0071310 1.131852 Cellular response to organic substance (1) 4.0
159 GO:0002548 1.126395 Monocyte chemotaxis (1) 1.0
142 GO:0002443 1.101635 Leukocyte mediated immunity (1) 4.0
141 GO:0050900 1.098856 Leukocyte migration (1) 3.0
583 GO:0032967 1.096638 Positive regulation of collagen biosynthetic process (1) 0.0
49 GO:0001569 1.083831 Branching involved in blood vessel morphogenesis (1) 0.0
437 GO:0007169 1.082604 Transmembrane receptor protein tyrosine kinase signaling pathway (1) 3.0
723 GO:0010921 1.079534 Regulation of phosphatase activity (1) 2.0
105 GO:0045860 1.071511 Positive regulation of protein kinase activity (1) 2.0
421 GO:0045747 1.070015 Positive regulation of notch signaling pathway (1) 0.0
39 GO:0008360 1.064064 Regulation of cell shape (1) 0.0
444 GO:0038084 1.058233 Vascular endothelial growth factor signaling pathway (1) 1.0
833 GO:0010467 1.038215 Gene expression (1) 5.0
365 GO:0090141 1.033165 Positive regulation of mitochondrial fission (1) 0.0
439 GO:0030509 1.027813 Bmp signaling pathway (1) 1.0
553 GO:0034394 1.017983 Protein localization to cell surface (1) 1.0
60 GO:0072210 1.009552 Metanephric nephron development (1) 1.0
427 GO:0046427 1.007946 Positive regulation of receptor signaling pathway via jak-stat (1) 1.0
684 GO:0051770 1.005599 Positive regulation of nitric-oxide synthase biosynthetic process (1) 0.0
309 GO:0034765 1.001361 Regulation of ion transmembrane transport (1) 4.0
655 GO:0008210 0.998843 Estrogen metabolic process (1) 1.0
486 GO:0048484 0.961996 Enteric nervous system development (1) 0.0
809 GO:0014068 0.961267 Positive regulation of phosphatidylinositol 3-kinase signaling (1) 0.0
441 GO:0007259 0.949624 Receptor signaling pathway via jak-stat (1) 2.0
825 GO:0036120 0.941171 Cellular response to platelet-derived growth factor stimulus (1) 0.0
667 GO:0035234 0.934130 Ectopic germ cell programmed cell death (1) 0.0
23 GO:0000278 0.933767 Mitotic cell cycle (1) 4.0
632 GO:1900006 0.924525 Positive regulation of dendrite development (1) 0.0
397 GO:0033627 0.910092 Cell adhesion mediated by integrin (1) 2.0
764 GO:0048701 0.908694 Embryonic cranial skeleton morphogenesis (1) 1.0
624 GO:0010628 0.896611 Positive regulation of gene expression (1) 3.0
862 GO:0035162 0.872366 Embryonic hemopoiesis (1) 1.0
11 GO:0043406 0.860510 Positive regulation of map kinase activity (1) 1.0
74 GO:0001817 0.857088 Regulation of cytokine production (1) 3.0
743 GO:0050918 0.840184 Positive chemotaxis (1) 1.0
829 GO:0010212 0.825924 Response to ionizing radiation (1) 1.0
922 GO:0036324 0.825686 Vascular endothelial growth factor receptor-2 signaling pathway (1) 0.0
895 GO:0070528 0.820011 Protein kinase c signaling (1) 1.0
664 GO:0051092 0.813750 Positive regulation of nf-kappab transcription factor activity (1) 0.0
27 GO:0045840 0.800332 Positive regulation of mitotic nuclear division (1) 1.0
827 GO:0010038 0.794352 Response to metal ion (1) 2.0
592 GO:0045595 0.785394 Regulation of cell differentiation (1) 4.0
263 GO:0010559 0.783904 Regulation of glycoprotein biosynthetic process (1) 1.0
503 GO:0021953 0.781212 Central nervous system neuron differentiation (1) 2.0
674 GO:0071276 0.780750 Cellular response to cadmium ion (1) 0.0
659 GO:0048469 0.780345 Cell maturation (1) 2.0
652 GO:0042180 0.773541 Cellular ketone metabolic process (1) 3.0
44 GO:0048812 0.771740 Neuron projection morphogenesis (1) 3.0
605 GO:0051901 0.770926 Positive regulation of mitochondrial depolarization (1) 0.0
155 GO:0030097 0.758070 Hemopoiesis (1) 4.0
306 GO:0043270 0.753238 Positive regulation of ion transport (1) 3.0
456 GO:0046578 0.747317 Regulation of ras protein signal transduction (1) 2.0
766 GO:0051145 0.741094 Smooth muscle cell differentiation (1) 2.0
379 GO:0032956 0.736770 Regulation of actin cytoskeleton organization (1) 3.0
898 GO:0035924 0.730231 Cellular response to vascular endothelial growth factor stimulus (1) 2.0
897 GO:0035767 0.729807 Endothelial cell chemotaxis (1) 1.0
817 GO:0010033 0.726109 Response to organic substance (1) 5.0
519 GO:0048839 0.725562 Inner ear development (1) 2.0
378 GO:0031532 0.722138 Actin cytoskeleton reorganization (1) 1.0
106 GO:0071900 0.721523 Regulation of protein serine/threonine kinase activity (1) 2.0
544 GO:0060179 0.712624 Male mating behavior (1) 0.0
763 GO:0060325 0.697674 Face morphogenesis (1) 0.0
277 GO:0016925 0.695177 Protein sumoylation (1) 1.0
328 GO:0043065 0.690749 Positive regulation of apoptotic process (1) 2.0
701 GO:0002053 0.689985 Positive regulation of mesenchymal cell proliferation (1) 0.0
77 GO:0002718 0.688320 Regulation of cytokine production involved in immune response (1) 2.0
691 GO:0043542 0.680928 Endothelial cell migration (1) 3.0
280 GO:0006810 0.677814 Transport (1) 7.0
239 GO:0006576 0.673930 Cellular biogenic amine metabolic process (1) 2.0
858 GO:0035019 0.669360 Somatic stem cell population maintenance (1) 1.0
347 GO:0006937 0.665317 Regulation of muscle contraction (1) 2.0
681 GO:2001257 0.650114 Regulation of cation channel activity (1) 2.0
711 GO:0019752 0.649156 Carboxylic acid metabolic process (1) 4.0
716 GO:0070662 0.644697 Mast cell proliferation (1) 0.0
509 GO:0060384 0.639217 Innervation (1) 1.0
900 GO:0042060 0.629958 Wound healing (1) 4.0
401 GO:0010811 0.627079 Positive regulation of cell-substrate adhesion (1) 1.0
388 GO:0051726 0.624874 Regulation of cell cycle (1) 5.0
144 GO:0031295 0.621763 T cell costimulation (1) 0.0
841 GO:0032355 0.617237 Response to estradiol (1) 1.0
126 GO:0002318 0.612203 Myeloid progenitor cell differentiation (1) 0.0
754 GO:0009888 0.609334 Tissue development (1) 4.0
430 GO:0019221 0.607804 Cytokine-mediated signaling pathway (1) 2.0
97 GO:0043129 0.607469 Surfactant homeostasis (1) 0.0
793 GO:0009725 0.604125 Response to hormone (1) 4.0
147 GO:0030218 0.603194 Erythrocyte differentiation (1) 1.0
675 GO:0071277 0.602376 Cellular response to calcium ion (1) 0.0
806 GO:0051056 0.599956 Regulation of small gtpase mediated signal transduction (1) 3.0
46 GO:0060348 0.599483 Bone development (1) 3.0
837 GO:0043536 0.595296 Positive regulation of blood vessel endothelial cell migration (1) 1.0
863 GO:0035855 0.595221 Megakaryocyte development (1) 0.0
803 GO:0042475 0.592086 Odontogenesis of dentin-containing tooth (1) 2.0
66 GO:0001824 0.588798 Blastocyst development (1) 1.0
495 GO:0060976 0.586013 Coronary vasculature development (1) 1.0
838 GO:0038033 0.585984 Positive regulation of endothelial cell chemotaxis by vegf-activated vascular endothelial growth factor receptor signaling pathway (1) 0.0
626 GO:0051649 0.585010 Establishment of localization in cell (1) 4.0
899 GO:0035994 0.581520 Response to muscle stretch (1) 1.0
398 GO:0045785 0.575910 Positive regulation of cell adhesion (1) 3.0
471 GO:0060045 0.574196 Positive regulation of cardiac muscle cell proliferation (1) 0.0
617 GO:0008354 0.569914 Germ cell migration (1) 0.0
331 GO:0071887 0.563795 Leukocyte apoptotic process (1) 2.0
508 GO:0042552 0.562630 Myelination (1) 2.0
47 GO:0048704 0.560470 Embryonic skeletal system morphogenesis (1) 2.0
386 GO:0007049 0.553817 Cell cycle (1) 6.0
346 GO:0006936 0.550409 Muscle contraction (1) 3.0
225 GO:0006470 0.541583 Protein dephosphorylation (1) 3.0
750 GO:0044281 0.539816 Small molecule metabolic process (1) 5.0
835 GO:0016239 0.538943 Positive regulation of macroautophagy (1) 1.0
219 GO:0006975 0.532966 Dna damage induced protein phosphorylation (1) 0.0
573 GO:0010629 0.530721 Negative regulation of gene expression (1) 3.0
354 GO:0050728 0.528698 Negative regulation of inflammatory response (1) 2.0
746 GO:0033327 0.520751 Leydig cell differentiation (1) 0.0
802 GO:0060766 0.520552 Negative regulation of androgen receptor signaling pathway (1) 0.0
249 GO:0019216 0.518909 Regulation of lipid metabolic process (1) 3.0
231 GO:0033619 0.516921 Membrane protein proteolysis (1) 1.0
428 GO:0007219 0.510598 Notch signaling pathway (1) 1.0
906 GO:0043549 0.506907 Regulation of kinase activity (1) 3.0
59 GO:0035788 0.505476 Cell migration involved in metanephros development (1) 0.0
708 GO:0050890 0.505448 Cognition (1) 2.0
468 GO:1903010 0.502791 Regulation of bone development (1) 0.0
912 GO:0043534 0.496543 Blood vessel endothelial cell migration (1) 2.0
148 GO:0050852 0.496154 T cell receptor signaling pathway (1) 1.0
639 GO:0060326 0.490465 Cell chemotaxis (1) 2.0
317 GO:0006897 0.488535 Endocytosis (1) 3.0
465 GO:0007275 0.485383 Multicellular organism development (1) 7.0
772 GO:0060485 0.484666 Mesenchyme development (1) 3.0
823 GO:0045471 0.483879 Response to ethanol (1) 1.0
373 GO:0030838 0.479914 Positive regulation of actin filament polymerization (1) 0.0
160 GO:0002573 0.478546 Myeloid leukocyte differentiation (1) 3.0
877 GO:0060437 0.473475 Lung growth (1) 0.0
245 GO:0006629 0.469312 Lipid metabolic process (1) 5.0
114 GO:0051894 0.466675 Positive regulation of focal adhesion assembly (1) 0.0
406 GO:0048041 0.466386 Focal adhesion assembly (1) 1.0
932 GO:0097021 0.465441 Lymphocyte migration into lymphoid organs (1) 0.0
339 GO:2000352 0.461676 Negative regulation of endothelial cell apoptotic process (1) 0.0
874 GO:1903053 0.452737 Regulation of extracellular matrix organization (1) 1.0
284 GO:0051050 0.451125 Positive regulation of transport (1) 4.0
13 GO:0046330 0.444601 Positive regulation of jnk cascade (1) 0.0
256 GO:0046474 0.443735 Glycerophospholipid biosynthetic process (1) 2.0
588 GO:0019222 0.443139 Regulation of metabolic process (1) 7.0
459 GO:0007267 0.428833 Cell-cell signaling (1) 5.0
919 GO:0046677 0.427086 Response to antibiotic (1) 1.0
462 GO:0007268 0.426575 Chemical synaptic transmission (1) 4.0
706 GO:0043547 0.426347 Positive regulation of gtpase activity (1) 1.0
660 GO:0045347 0.425159 Negative regulation of mhc class ii biosynthetic process (1) 0.0
290 GO:2000300 0.421019 Regulation of synaptic vesicle exocytosis (1) 1.0
791 GO:0009582 0.420489 Detection of abiotic stimulus (1) 2.0
907 GO:0051881 0.420291 Regulation of mitochondrial membrane potential (1) 1.0
517 GO:0043586 0.419816 Tongue development (1) 1.0
533 GO:0030168 0.419395 Platelet activation (1) 2.0
469 GO:0060173 0.411322 Limb development (1) 1.0
90 GO:0072284 0.410934 Metanephric s-shaped body morphogenesis (1) 0.0
780 GO:0046890 0.404557 Regulation of lipid biosynthetic process (1) 2.0
229 GO:0016485 0.400777 Protein processing (1) 3.0
446 GO:0048010 0.399575 Vascular endothelial growth factor receptor signaling pathway (1) 1.0
402 GO:0022407 0.398607 Regulation of cell-cell adhesion (1) 3.0
287 GO:0099111 0.397633 Microtubule-based transport (1) 3.0
194 GO:0006355 0.390046 Regulation of transcription, dna-templated (1) 4.0
913 GO:0090630 0.382248 Activation of gtpase activity (1) 0.0
122 GO:0002062 0.381242 Chondrocyte differentiation (1) 2.0
629 GO:0051174 0.380683 Regulation of phosphorus metabolic process (1) 6.0
668 GO:0010976 0.374728 Positive regulation of neuron projection development (1) 1.0
604 GO:0045088 0.374664 Regulation of innate immune response (1) 2.0
139 GO:0002684 0.374517 Positive regulation of immune system process (1) 4.0
250 GO:0044255 0.373685 Cellular lipid metabolic process (1) 4.0
163 GO:0002685 0.368480 Regulation of leukocyte migration (1) 2.0
369 GO:0055003 0.366810 Cardiac myofibril assembly (1) 0.0
265 GO:0051247 0.362984 Positive regulation of protein metabolic process (1) 4.0
795 GO:0043627 0.362212 Response to estrogen (1) 1.0
707 GO:0007202 0.359908 Activation of phospholipase c activity (1) 0.0
223 GO:0018108 0.359532 Peptidyl-tyrosine phosphorylation (1) 3.0
832 GO:0042220 0.359499 Response to cocaine (1) 1.0
715 GO:0061351 0.358465 Neural precursor cell proliferation (1) 3.0
873 GO:0002327 0.358081 Immature b cell differentiation (1) 0.0
16 GO:0046329 0.357586 Negative regulation of jnk cascade (1) 1.0
409 GO:0007166 0.352431 Cell surface receptor signaling pathway (1) 4.0
753 GO:1901135 0.352161 Carbohydrate derivative metabolic process (1) 4.0
69 GO:0001755 0.350614 Neural crest cell migration (1) 1.0
440 GO:0030512 0.350318 Negative regulation of transforming growth factor beta receptor signaling pathway (1) 0.0
563 GO:1900272 0.349758 Negative regulation of long-term synaptic potentiation (1) 0.0
650 GO:0034329 0.348915 Cell junction assembly (1) 2.0
230 GO:0030162 0.336130 Regulation of proteolysis (1) 3.0
175 GO:0005975 0.330773 Carbohydrate metabolic process (1) 4.0
627 GO:0035306 0.328011 Positive regulation of dephosphorylation (1) 1.0
400 GO:0007160 0.326200 Cell-matrix adhesion (1) 2.0
455 GO:0016601 0.324410 Rac protein signal transduction (1) 2.0
697 GO:0050678 0.322499 Regulation of epithelial cell proliferation (1) 3.0
71 GO:0060444 0.322093 Branching involved in mammary gland duct morphogenesis (1) 1.0
226 GO:0035304 0.320405 Regulation of protein dephosphorylation (1) 2.0
412 GO:0009966 0.306725 Regulation of signal transduction (1) 5.0
914 GO:0046486 0.298756 Glycerolipid metabolic process (1) 3.0
539 GO:0050795 0.293322 Regulation of behavior (1) 2.0
86 GO:0072006 0.288499 Nephron development (1) 2.0
337 GO:0043525 0.284469 Positive regulation of neuron apoptotic process (1) 0.0
625 GO:0051902 0.284041 Negative regulation of mitochondrial depolarization (1) 0.0
58 GO:0003338 0.282674 Metanephros morphogenesis (1) 1.0
811 GO:0043124 0.281702 Negative regulation of i-kappab kinase/nf-kappab signaling (1) 0.0
812 GO:0051897 0.280267 Positive regulation of protein kinase b signaling (1) 0.0
203 GO:0006163 0.279176 Purine nucleotide metabolic process (1) 3.0
543 GO:0007612 0.277571 Learning (1) 1.0
466 GO:0007389 0.275989 Pattern specification process (1) 3.0
883 GO:0032148 0.273937 Activation of protein kinase b activity (1) 0.0
483 GO:0035909 0.270466 Aorta morphogenesis (1) 1.0
755 GO:0016358 0.270034 Dendrite development (1) 3.0
307 GO:0034766 0.269283 Negative regulation of ion transmembrane transport (1) 1.0
513 GO:0030900 0.265273 Forebrain development (1) 3.0
479 GO:0048598 0.253519 Embryonic morphogenesis (1) 4.0
507 GO:0007416 0.252929 Synapse assembly (1) 1.0
860 GO:0051928 0.247690 Positive regulation of calcium ion transport (1) 2.0
651 GO:0050808 0.247245 Synapse organization (1) 3.0
890 GO:0071353 0.244414 Cellular response to interleukin-4 (1) 1.0
392 GO:0032467 0.243007 Positive regulation of cytokinesis (1) 0.0
695 GO:0015980 0.237863 Energy derivation by oxidation of organic compounds (1) 3.0
235 GO:0032436 0.237590 Positive regulation of proteasomal ubiquitin-dependent protein catabolic process (1) 1.0
560 GO:0043473 0.231280 Pigmentation (1) 2.0
463 GO:0035249 0.230532 Synaptic transmission, glutamatergic (1) 1.0
870 GO:0070527 0.229018 Platelet aggregation (1) 1.0
273 GO:0016575 0.227418 Histone deacetylation (1) 2.0
32 GO:0000422 0.224098 Autophagy of mitochondrion (1) 2.0
113 GO:0001952 0.223395 Regulation of cell-matrix adhesion (1) 1.0
9 GO:0043408 0.223296 Regulation of mapk cascade (1) 2.0
125 GO:0002244 0.223128 Hematopoietic progenitor cell differentiation (1) 2.0
132 GO:0002274 0.221454 Myeloid leukocyte activation (1) 2.0
869 GO:0030318 0.209787 Melanocyte differentiation (1) 1.0
0 GO:0000077 0.209594 Dna damage checkpoint signaling (1) 1.0
255 GO:0006644 0.209513 Phospholipid metabolic process (1) 3.0
645 GO:0071300 0.209465 Cellular response to retinoic acid (1) 0.0
729 GO:0120035 0.207325 Regulation of plasma membrane bounded cell projection organization (1) 3.0
796 GO:0097067 0.207250 Cellular response to thyroid hormone stimulus (1) 0.0
516 GO:0007423 0.206232 Sensory organ development (1) 3.0
410 GO:0007186 0.205939 G protein-coupled receptor signaling pathway (1) 3.0
722 GO:0042325 0.203618 Regulation of phosphorylation (1) 5.0
794 GO:0043434 0.202441 Response to peptide hormone (1) 3.0
88 GO:0090184 0.201141 Positive regulation of kidney development (1) 0.0
431 GO:0031663 0.200051 Lipopolysaccharide-mediated signaling pathway (1) 1.0
866 GO:0030101 0.198593 Natural killer cell activation (1) 2.0
38 GO:0000902 0.193896 Cell morphogenesis (1) 4.0
259 GO:0009259 0.192137 Ribonucleotide metabolic process (1) 3.0
703 GO:0048661 0.191886 Positive regulation of smooth muscle cell proliferation (1) 1.0
740 GO:0014911 0.191054 Positive regulation of smooth muscle cell migration (1) 1.0
714 GO:0050673 0.189629 Epithelial cell proliferation (1) 4.0
43 GO:0050770 0.188075 Regulation of axonogenesis (1) 2.0
381 GO:0008064 0.187642 Regulation of actin polymerization or depolymerization (1) 2.0
851 GO:0070933 0.185342 Histone h4 deacetylation (1) 0.0
762 GO:0060749 0.181899 Mammary gland alveolus development (1) 0.0
454 GO:0007266 0.181338 Rho protein signal transduction (1) 1.0
541 GO:0008542 0.180964 Visual learning (1) 0.0
499 GO:1990384 0.180877 Hyaloid vascular plexus regression (1) 0.0
320 GO:0006914 0.178279 Autophagy (1) 4.0
42 GO:0048675 0.176485 Axon extension (1) 2.0
234 GO:0010952 0.174912 Positive regulation of peptidase activity (1) 2.0
192 GO:0006352 0.168084 Dna-templated transcription, initiation (1) 3.0
448 GO:0008277 0.168080 Regulation of g protein-coupled receptor signaling pathway (1) 2.0
582 GO:1902459 0.162029 Positive regulation of stem cell population maintenance (1) 0.0
489 GO:0021766 0.161871 Hippocampus development (1) 1.0
154 GO:0050778 0.157385 Positive regulation of immune response (1) 3.0
187 GO:0071897 0.156799 Dna biosynthetic process (1) 2.0
7 GO:0000122 0.155105 Negative regulation of transcription by rna polymerase ii (1) 1.0
821 GO:0034097 0.153698 Response to cytokine (1) 3.0
124 GO:0002218 0.151919 Activation of innate immune response (1) 1.0
699 GO:0033689 0.151375 Negative regulation of osteoblast proliferation (1) 0.0
288 GO:0032940 0.151138 Secretion by cell (1) 5.0
854 GO:0070102 0.145028 Interleukin-6-mediated signaling pathway (1) 0.0
361 GO:0010638 0.143294 Positive regulation of organelle organization (1) 2.0
612 GO:0019827 0.141470 Stem cell population maintenance (1) 2.0
781 GO:0008625 0.139268 Extrinsic apoptotic signaling pathway via death domain receptors (1) 1.0
101 GO:0033141 0.136334 Positive regulation of peptidyl-serine phosphorylation of stat protein (1) 0.0
611 GO:1902904 0.136088 Negative regulation of supramolecular fiber organization (1) 2.0
643 GO:0071222 0.135913 Cellular response to lipopolysaccharide (1) 2.0
394 GO:0007059 0.133987 Chromosome segregation (1) 3.0
824 GO:0071363 0.126406 Cellular response to growth factor stimulus (1) 3.0
53 GO:0001570 0.126375 Vasculogenesis (1) 1.0
738 GO:0098586 0.123247 Cellular response to virus (1) 1.0
472 GO:0001553 0.119373 Luteinization (1) 0.0
109 GO:1905564 0.116462 Positive regulation of vascular endothelial cell proliferation (1) 0.0
709 GO:0019233 0.115463 Sensory perception of pain (1) 1.0
728 GO:0030032 0.113460 Lamellipodium assembly (1) 1.0
257 GO:0046488 0.111935 Phosphatidylinositol metabolic process (1) 2.0
313 GO:0051281 0.111192 Positive regulation of release of sequestered calcium ion into cytosol (1) 1.0
924 GO:0035754 0.107144 B cell chemotaxis (1) 0.0
658 GO:0048468 0.103878 Cell development (1) 5.0
545 GO:0008016 0.102922 Regulation of heart contraction (1) 3.0
532 GO:0007596 0.102694 Blood coagulation (1) 3.0
103 GO:0042531 0.102081 Positive regulation of tyrosine phosphorylation of stat protein (1) 0.0
938 GO:0007158 0.100508 Neuron cell-cell adhesion (1) 0.0
383 GO:0051497 0.100321 Negative regulation of stress fiber assembly (1) 0.0
57 GO:0001656 0.100188 Metanephros development (1) 2.0
83 GO:0002720 0.098565 Positive regulation of cytokine production involved in immune response (1) 1.0
382 GO:0030041 0.097715 Actin filament polymerization (1) 2.0
89 GO:0072239 0.093313 Metanephric glomerulus vasculature development (1) 0.0
180 GO:0019318 0.092513 Hexose metabolic process (1) 2.0
555 GO:1903078 0.092013 Positive regulation of protein localization to plasma membrane (1) 1.0
434 GO:0048011 0.091817 Neurotrophin trk receptor signaling pathway (1) 1.0
119 GO:0060740 0.091714 Prostate gland epithelium morphogenesis (1) 1.0
95 GO:0001894 0.091179 Tissue homeostasis (1) 2.0
661 GO:0046326 0.090707 Positive regulation of glucose import (1) 0.0
435 GO:0016055 0.090703 Wnt signaling pathway (1) 2.0
613 GO:0032922 0.089808 Circadian regulation of gene expression (1) 0.0
450 GO:0051209 0.087976 Release of sequestered calcium ion into cytosol (1) 3.0
477 GO:0040016 0.085668 Embryonic cleavage (1) 0.0
607 GO:0042391 0.085377 Regulation of membrane potential (1) 3.0
669 GO:0010592 0.084513 Positive regulation of lamellipodium assembly (1) 0.0
170 GO:0003007 0.084095 Heart morphogenesis (1) 3.0
269 GO:0031397 0.081637 Negative regulation of protein ubiquitination (1) 1.0
206 GO:0006298 0.080826 Mismatch repair (1) 0.0
31 GO:0070301 0.080430 Cellular response to hydrogen peroxide (1) 1.0
130 GO:0002819 0.080102 Regulation of adaptive immune response (1) 3.0
29 GO:1901031 0.077813 Regulation of response to reactive oxygen species (1) 1.0
242 GO:0090314 0.077607 Positive regulation of protein targeting to membrane (1) 0.0
417 GO:0060395 0.076956 Smad protein signal transduction (1) 1.0
710 GO:0055119 0.076428 Relaxation of cardiac muscle (1) 1.0
567 GO:0051641 0.073979 Cellular localization (1) 5.0
491 GO:0060440 0.069181 Trachea formation (1) 0.0
608 GO:0043114 0.069090 Regulation of vascular permeability (1) 1.0
190 GO:0045739 0.068981 Positive regulation of dna repair (1) 1.0
283 GO:0051049 0.068824 Regulation of transport (1) 5.0
595 GO:0051302 0.067812 Regulation of cell division (1) 1.0
91 GO:0001837 0.067510 Epithelial to mesenchymal transition (1) 2.0
110 GO:0001942 0.066958 Hair follicle development (1) 1.0
275 GO:0090042 0.066526 Tubulin deacetylation (1) 1.0
41 GO:0007411 0.065935 Axon guidance (1) 2.0
679 GO:0042310 0.063952 Vasoconstriction (1) 1.0
232 GO:0010951 0.062732 Negative regulation of endopeptidase activity (1) 2.0
865 GO:0030217 0.062144 T cell differentiation (1) 3.0
171 GO:0003014 0.057983 Renal system process (1) 2.0
501 GO:0007507 0.057228 Heart development (1) 4.0
70 GO:0001763 0.054372 Morphogenesis of a branching structure (1) 2.0
351 GO:0060048 0.052464 Cardiac muscle contraction (1) 2.0
121 GO:0090050 0.049572 Positive regulation of cell migration involved in sprouting angiogenesis (1) 0.0
169 GO:0002764 0.049060 Immune response-regulating signaling pathway (1) 3.0
630 GO:2000377 0.047289 Regulation of reactive oxygen species metabolic process (1) 2.0
537 GO:0007626 0.045891 Locomotory behavior (1) 2.0
799 GO:0009743 0.045252 Response to carbohydrate (1) 2.0
301 GO:0032388 0.044514 Positive regulation of intracellular transport (1) 1.0
40 GO:0034446 0.043394 Substrate adhesion-dependent cell spreading (1) 1.0
152 GO:0045087 0.042899 Innate immune response (1) 3.0
73 GO:0001779 0.041596 Natural killer cell differentiation (1) 1.0
212 GO:0051090 0.040406 Regulation of dna-binding transcription factor activity (1) 2.0
864 GO:0030183 0.038386 B cell differentiation (1) 1.0
705 GO:0051353 0.036453 Positive regulation of oxidoreductase activity (1) 1.0
587 GO:0050921 0.036116 Positive regulation of chemotaxis (1) 1.0
760 GO:0048286 0.035821 Lung alveolus development (1) 1.0
254 GO:0006687 0.035424 Glycosphingolipid metabolic process (1) 2.0
228 GO:0006508 0.033855 Proteolysis (1) 4.0
757 GO:0021987 0.030030 Cerebral cortex development (1) 2.0
673 GO:0043392 0.029313 Negative regulation of dna binding (1) 1.0
530 GO:0007584 0.028301 Response to nutrient (1) 3.0
712 GO:0033002 0.028034 Muscle cell proliferation (1) 2.0
792 GO:0060560 0.027744 Developmental growth involved in morphogenesis (1) 3.0
797 GO:0071383 0.026769 Cellular response to steroid hormone stimulus (1) 2.0
920 GO:0036092 0.025989 Phosphatidylinositol-3-phosphate biosynthetic process (1) 0.0
396 GO:0007162 0.024820 Negative regulation of cell adhesion (1) 2.0
498 GO:0061029 0.024413 Eyelid development in camera-type eye (1) 0.0
480 GO:0048565 0.023576 Digestive tract development (1) 1.0
51 GO:0045766 0.022903 Positive regulation of angiogenesis (1) 1.0
574 GO:0008285 0.020185 Negative regulation of cell population proliferation (1) 3.0
521 GO:0035051 0.019783 Cardiocyte differentiation (1) 2.0
205 GO:0045740 0.019424 Positive regulation of dna replication (1) 1.0
487 GO:0030325 0.018166 Adrenal gland development (1) 0.0
601 GO:0034103 0.018123 Regulation of tissue remodeling (1) 1.0
261 GO:0051171 0.014388 Regulation of nitrogen compound metabolic process (1) 6.0
570 GO:0045597 0.014362 Positive regulation of cell differentiation (1) 3.0
842 GO:0071407 0.014361 Cellular response to organic cyclic compound (1) 3.0
936 GO:0051000 0.014216 Positive regulation of nitric-oxide synthase activity (1) 0.0
461 GO:0050804 0.011722 Modulation of chemical synaptic transmission (1) 3.0
54 GO:2001214 0.010912 Positive regulation of vasculogenesis (1) 0.0
236 GO:0043161 0.009887 Proteasome-mediated ubiquitin-dependent protein catabolic process (1) 2.0
734 GO:0051702 0.006784 Biological process involved in interaction with symbiont (1) 2.0
717 GO:0046651 0.006589 Lymphocyte proliferation (1) 2.0
323 GO:0016236 0.006518 Macroautophagy (1) 3.0
158 GO:0002862 0.006214 Negative regulation of inflammatory response to antigenic stimulus (1) 1.0
619 GO:0050790 0.006102 Regulation of catalytic activity (1) 4.0
520 GO:0007498 0.005849 Mesoderm development (1) 3.0
136 GO:0002376 0.005563 Immune system process (1) 6.0
631 GO:0050920 0.005385 Regulation of chemotaxis (1) 2.0
403 GO:0033628 0.003111 Regulation of cell adhesion mediated by integrin (1) 1.0
345 GO:0006919 0.001149 Activation of cysteine-type endopeptidase activity involved in apoptotic process (1) 1.0
730 GO:0031175 0.000000 Neuron projection development (1) 4.0
304 GO:0046942 -0.001338 Carboxylic acid transport (1) 3.0
474 GO:0030539 -0.001885 Male genitalia development (1) 0.0
561 GO:0048511 -0.003138 Rhythmic process (1) 3.0
736 GO:0009617 -0.003666 Response to bacterium (1) 3.0
529 GO:0007568 -0.004377 Aging (1) 1.0
747 GO:0048878 -0.006747 Chemical homeostasis (1) 6.0
291 GO:0030072 -0.007065 Peptide hormone secretion (1) 3.0
580 GO:0045907 -0.009718 Positive regulation of vasoconstriction (1) 0.0
467 GO:0009791 -0.011347 Post-embryonic development (1) 1.0
935 GO:0048103 -0.011384 Somatic stem cell division (1) 0.0
657 GO:0045165 -0.012040 Cell fate commitment (1) 3.0
414 GO:0023019 -0.016733 Signal transduction involved in regulation of gene expression (1) 0.0
429 GO:0007229 -0.017112 Integrin-mediated signaling pathway (1) 1.0
312 GO:1904062 -0.018714 Regulation of cation transmembrane transport (1) 3.0
296 GO:0015031 -0.019362 Protein transport (1) 4.0
191 GO:0045944 -0.020718 Positive regulation of transcription by rna polymerase ii (1) 2.0
602 GO:0046620 -0.020955 Regulation of organ growth (1) 1.0
247 GO:0008610 -0.024022 Lipid biosynthetic process (1) 3.0
576 GO:0050866 -0.024773 Negative regulation of cell activation (1) 2.0
868 GO:0048863 -0.027968 Stem cell differentiation (1) 2.0
310 GO:0051924 -0.028544 Regulation of calcium ion transport (1) 3.0
332 GO:0097190 -0.028734 Apoptotic signaling pathway (1) 4.0
482 GO:0048557 -0.029065 Embryonic digestive tract morphogenesis (1) 0.0
262 GO:0045429 -0.029754 Positive regulation of nitric oxide biosynthetic process (1) 0.0
622 GO:0043086 -0.032086 Negative regulation of catalytic activity (1) 3.0
623 GO:0031334 -0.034770 Positive regulation of protein-containing complex assembly (1) 2.0
131 GO:0002821 -0.037160 Positive regulation of adaptive immune response (1) 2.0
618 GO:0032409 -0.037577 Regulation of transporter activity (1) 3.0
63 GO:0001666 -0.040150 Response to hypoxia (1) 2.0
196 GO:0006357 -0.041500 Regulation of transcription by rna polymerase ii (1) 3.0
270 GO:0031056 -0.041705 Regulation of histone modification (1) 2.0
733 GO:0030335 -0.043802 Positive regulation of cell migration (1) 3.0
93 GO:0001843 -0.044624 Neural tube closure (1) 1.0
370 GO:0033044 -0.044682 Regulation of chromosome organization (1) 2.0
408 GO:0007165 -0.044905 Signal transduction (1) 6.0
153 GO:0050776 -0.045236 Regulation of immune response (1) 4.0
227 GO:0032516 -0.046939 Positive regulation of phosphoprotein phosphatase activity (1) 0.0
217 GO:0006457 -0.046986 Protein folding (1) 1.0
415 GO:0030522 -0.048782 Intracellular receptor signaling pathway (1) 2.0
366 GO:0090201 -0.049666 Negative regulation of release of cytochrome c from mitochondria (1) 0.0
436 GO:0060079 -0.050457 Excitatory postsynaptic potential (1) 1.0
380 GO:0051017 -0.052351 Actin filament bundle assembly (1) 1.0
903 GO:0050863 -0.053759 Regulation of t cell activation (1) 3.0
78 GO:0032760 -0.054179 Positive regulation of tumor necrosis factor production (1) 0.0
188 GO:0009165 -0.055647 Nucleotide biosynthetic process (1) 3.0
904 GO:0042113 -0.058054 B cell activation (1) 3.0
138 GO:0002683 -0.058846 Negative regulation of immune system process (1) 3.0
826 GO:0071364 -0.059153 Cellular response to epidermal growth factor stimulus (1) 0.0
915 GO:0050872 -0.059361 White fat cell differentiation (1) 0.0
102 GO:0050731 -0.063114 Positive regulation of peptidyl-tyrosine phosphorylation (1) 2.0
133 GO:0043303 -0.063260 Mast cell degranulation (1) 1.0
494 GO:0060840 -0.069316 Artery development (1) 2.0
504 GO:0048709 -0.071150 Oligodendrocyte differentiation (1) 2.0
178 GO:0005984 -0.071330 Disaccharide metabolic process (1) 0.0
737 GO:0051607 -0.071636 Defense response to virus (1) 2.0
732 GO:0060997 -0.074427 Dendritic spine morphogenesis (1) 1.0
460 GO:0023061 -0.075653 Signal release (1) 4.0
67 GO:0001892 -0.076334 Embryonic placenta development (1) 1.0
724 GO:0036473 -0.088337 Cell death in response to oxidative stress (1) 2.0
447 GO:0007173 -0.090517 Epidermal growth factor receptor signaling pathway (1) 2.0
353 GO:0006954 -0.092872 Inflammatory response (1) 3.0
861 GO:0090280 -0.094645 Positive regulation of calcium ion import (1) 0.0
731 GO:0031529 -0.095254 Ruffle organization (1) 1.0
779 GO:0008544 -0.096671 Epidermis development (1) 3.0
901 GO:0042110 -0.098950 T cell activation (1) 4.0
690 GO:2001020 -0.101079 Regulation of response to dna damage stimulus (1) 2.0
360 GO:0007030 -0.102202 Golgi organization (1) 1.0
295 GO:0051047 -0.107145 Positive regulation of secretion (1) 3.0
786 GO:0034605 -0.107349 Cellular response to heat (1) 1.0
749 GO:0009058 -0.108343 Biosynthetic process (1) 5.0
688 GO:1900407 -0.109537 Regulation of cellular response to oxidative stress (1) 2.0
735 GO:0046718 -0.110283 Viral entry into host cell (1) 1.0
76 GO:0001819 -0.110409 Positive regulation of cytokine production (1) 2.0
50 GO:0002040 -0.110656 Sprouting angiogenesis (1) 1.0
692 GO:0010595 -0.112175 Positive regulation of endothelial cell migration (1) 2.0
419 GO:0003376 -0.112243 Sphingosine-1-phosphate receptor signaling pathway (1) 0.0
374 GO:2000251 -0.113035 Positive regulation of actin cytoskeleton reorganization (1) 0.0
845 GO:0016071 -0.114794 Mrna metabolic process (1) 4.0
589 GO:0042752 -0.116589 Regulation of circadian rhythm (1) 2.0
615 GO:0035265 -0.117147 Organ growth (1) 2.0
918 GO:0046854 -0.117793 Phosphatidylinositol phosphate biosynthetic process (1) 1.0
485 GO:0007422 -0.118904 Peripheral nervous system development (1) 2.0
81 GO:0032729 -0.121441 Positive regulation of interferon-gamma production (1) 0.0
591 GO:0010941 -0.121750 Regulation of cell death (1) 5.0
759 GO:0032835 -0.121813 Glomerulus development (1) 1.0
511 GO:0030182 -0.123468 Neuron differentiation (1) 5.0
260 GO:0006807 -0.123894 Nitrogen compound metabolic process (1) 7.0
859 GO:0021782 -0.127920 Glial cell development (1) 2.0
108 GO:0001938 -0.130000 Positive regulation of endothelial cell proliferation (1) 1.0
335 GO:2000270 -0.130342 Negative regulation of fibroblast apoptotic process (1) 0.0
162 GO:0060374 -0.130465 Mast cell differentiation (1) 0.0
87 GO:0072073 -0.132456 Kidney epithelium development (1) 2.0
183 GO:0006401 -0.132593 Rna catabolic process (1) 3.0
818 GO:0010243 -0.134405 Response to organonitrogen compound (1) 4.0
641 GO:0071417 -0.135033 Cellular response to organonitrogen compound (1) 3.0
464 GO:0060291 -0.136241 Long-term synaptic potentiation (1) 1.0
929 GO:0051341 -0.137416 Regulation of oxidoreductase activity (1) 2.0
581 GO:0120162 -0.138251 Positive regulation of cold-induced thermogenesis (1) 0.0
389 GO:0010564 -0.140360 Regulation of cell cycle process (1) 4.0
443 GO:0035860 -0.140553 Glial cell-derived neurotrophic factor receptor signaling pathway (1) 0.0
20 GO:0070507 -0.141310 Regulation of microtubule cytoskeleton organization (1) 2.0
453 GO:0007265 -0.142317 Ras protein signal transduction (1) 3.0
751 GO:0046034 -0.143417 Atp metabolic process (1) 2.0
3 GO:1900087 -0.145967 Positive regulation of g1/s transition of mitotic cell cycle (1) 0.0
214 GO:0006412 -0.146506 Translation (1) 4.0
566 GO:0032879 -0.149430 Regulation of localization (1) 6.0
671 GO:0032092 -0.152370 Positive regulation of protein binding (1) 1.0
761 GO:0060021 -0.155229 Roof of mouth development (1) 1.0
420 GO:2001241 -0.155496 Positive regulation of extrinsic apoptotic signaling pathway in absence of ligand (1) 0.0
405 GO:0007159 -0.155632 Leukocyte cell-cell adhesion (1) 3.0
358 GO:0007005 -0.155655 Mitochondrion organization (1) 3.0
210 GO:1902275 -0.156027 Regulation of chromatin organization (1) 1.0
311 GO:0030001 -0.158264 Metal ion transport (1) 5.0
810 GO:0043123 -0.158702 Positive regulation of i-kappab kinase/nf-kappab signaling (1) 0.0
168 GO:0045580 -0.161790 Regulation of t cell differentiation (1) 2.0
72 GO:0001764 -0.163226 Neuron migration (1) 1.0
776 GO:0060644 -0.163884 Mammary gland epithelial cell differentiation (1) 0.0
319 GO:0006909 -0.166043 Phagocytosis (1) 2.0
268 GO:0016570 -0.169677 Histone modification (1) 4.0
788 GO:0009410 -0.171927 Response to xenobiotic stimulus (1) 2.0
128 GO:1902036 -0.173697 Regulation of hematopoietic stem cell differentiation (1) 0.0
609 GO:0048167 -0.175411 Regulation of synaptic plasticity (1) 2.0
377 GO:0031032 -0.181549 Actomyosin structure organization (1) 2.0
438 GO:0007179 -0.182780 Transforming growth factor beta receptor signaling pathway (1) 1.0
28 GO:0000302 -0.183602 Response to reactive oxygen species (1) 2.0
745 GO:0009653 -0.188122 Anatomical structure morphogenesis (1) 5.0
395 GO:0007155 -0.188355 Cell adhesion (1) 5.0
535 GO:0050910 -0.188639 Detection of mechanical stimulus involved in sensory perception of sound (1) 0.0
14 GO:0043407 -0.189998 Negative regulation of map kinase activity (1) 1.0
636 GO:0030048 -0.190360 Actin filament-based movement (1) 2.0
909 GO:0042593 -0.191255 Glucose homeostasis (1) 2.0
546 GO:0008104 -0.192695 Protein localization (1) 5.0
289 GO:0007269 -0.196526 Neurotransmitter secretion (1) 2.0
885 GO:1900180 -0.200364 Regulation of protein localization to nucleus (1) 1.0
30 GO:1901300 -0.200519 Positive regulation of hydrogen peroxide-mediated programmed cell death (1) 0.0
739 GO:1902903 -0.203430 Regulation of supramolecular fiber organization (1) 3.0
847 GO:0045055 -0.204997 Regulated exocytosis (1) 2.0
299 GO:0030705 -0.206607 Cytoskeleton-dependent intracellular transport (1) 3.0
340 GO:2001234 -0.207987 Negative regulation of apoptotic signaling pathway (1) 3.0
849 GO:0043966 -0.208541 Histone h3 acetylation (1) 2.0
286 GO:0055085 -0.210703 Transmembrane transport (1) 6.0
363 GO:0051494 -0.211867 Negative regulation of cytoskeleton organization (1) 2.0
48 GO:0001525 -0.211963 Angiogenesis (1) 2.0
813 GO:0051898 -0.215146 Negative regulation of protein kinase b signaling (1) 0.0
700 GO:0033690 -0.223856 Positive regulation of osteoblast proliferation (1) 0.0
209 GO:0006338 -0.224254 Chromatin remodeling (1) 2.0
416 GO:0035556 -0.225162 Intracellular signal transduction (1) 4.0
916 GO:2000739 -0.225462 Regulation of mesenchymal stem cell differentiation (1) 0.0
79 GO:0032740 -0.225949 Positive regulation of interleukin-17 production (1) 0.0
572 GO:0034767 -0.226631 Positive regulation of ion transmembrane transport (1) 2.0
272 GO:0031398 -0.229217 Positive regulation of protein ubiquitination (1) 2.0
222 GO:0018107 -0.231930 Peptidyl-threonine phosphorylation (1) 1.0
879 GO:0031103 -0.232786 Axon regeneration (1) 1.0
413 GO:0009968 -0.233227 Negative regulation of signal transduction (1) 4.0
765 GO:0021795 -0.237208 Cerebral cortex cell migration (1) 1.0
325 GO:0006915 -0.240649 Apoptotic process (1) 5.0
172 GO:0003158 -0.242769 Endothelium development (1) 2.0
357 GO:0006997 -0.247765 Nucleus organization (1) 2.0
725 GO:0070997 -0.248013 Neuron death (1) 2.0
355 GO:0050729 -0.250210 Positive regulation of inflammatory response (1) 1.0
233 GO:0016579 -0.250907 Protein deubiquitination (1) 1.0
666 GO:0010727 -0.258854 Negative regulation of hydrogen peroxide metabolic process (1) 0.0
202 GO:0006753 -0.259533 Nucleoside phosphate metabolic process (1) 4.0
685 GO:0051973 -0.259761 Positive regulation of telomerase activity (1) 0.0
549 GO:0009306 -0.261559 Protein secretion (1) 3.0
778 GO:0045793 -0.270168 Positive regulation of cell size (1) 0.0
303 GO:1904659 -0.270188 Glucose transmembrane transport (1) 1.0
676 GO:0030282 -0.272104 Bone mineralization (1) 1.0
294 GO:0051046 -0.276779 Regulation of secretion (1) 4.0
362 GO:0033043 -0.280077 Regulation of organelle organization (1) 4.0
123 GO:0002064 -0.285938 Epithelial cell development (1) 2.0
282 GO:0016192 -0.287866 Vesicle-mediated transport (1) 4.0
678 GO:0071333 -0.289435 Cellular response to glucose stimulus (1) 1.0
523 GO:0007517 -0.293992 Muscle organ development (1) 3.0
150 GO:0002437 -0.297868 Inflammatory response to antigenic stimulus (1) 2.0
704 GO:1904707 -0.299166 Positive regulation of vascular associated smooth muscle cell proliferation (1) 0.0
771 GO:0031016 -0.299979 Pancreas development (1) 2.0
937 GO:0051640 -0.307828 Organelle localization (1) 3.0
107 GO:1905563 -0.308080 Negative regulation of vascular endothelial cell proliferation (1) 0.0
404 GO:0007156 -0.311359 Homophilic cell adhesion via plasma membrane adhesion molecules (1) 1.0
238 GO:0043162 -0.311445 Ubiquitin-dependent protein catabolic process via the multivesicular body sorting pathway (1) 1.0
752 GO:0043170 -0.315334 Macromolecule metabolic process (1) 7.0
552 GO:0033365 -0.319737 Protein localization to organelle (1) 3.0
881 GO:0031929 -0.320242 Tor signaling (1) 2.0
484 GO:0007399 -0.320323 Nervous system development (1) 6.0
129 GO:0002250 -0.320676 Adaptive immune response (1) 4.0
741 GO:0016477 -0.323694 Cell migration (1) 4.0
600 GO:0010632 -0.324795 Regulation of epithelial cell migration (1) 3.0
197 GO:0006367 -0.325397 Transcription initiation from rna polymerase ii promoter (1) 2.0
118 GO:0060571 -0.325595 Morphogenesis of an epithelial fold (1) 1.0
505 GO:0007283 -0.329296 Spermatogenesis (1) 2.0
542 GO:0048149 -0.329477 Behavioral response to ethanol (1) 0.0
391 GO:0090068 -0.333050 Positive regulation of cell cycle process (1) 2.0
656 GO:0030154 -0.339581 Cell differentiation (1) 6.0
698 GO:0070663 -0.341786 Regulation of leukocyte proliferation (1) 2.0
164 GO:0051249 -0.342420 Regulation of lymphocyte activation (1) 4.0
720 GO:0051261 -0.344762 Protein depolymerization (1) 2.0
475 GO:0050769 -0.344990 Positive regulation of neurogenesis (1) 2.0
165 GO:0050870 -0.345649 Positive regulation of t cell activation (1) 2.0
432 GO:0038007 -0.346961 Netrin-activated signaling pathway (1) 0.0
246 GO:0008202 -0.348429 Steroid metabolic process (1) 3.0
831 GO:0001975 -0.357673 Response to amphetamine (1) 0.0
177 GO:0045821 -0.361584 Positive regulation of glycolytic process (1) 0.0
127 GO:0002320 -0.363702 Lymphoid progenitor cell differentiation (1) 1.0
399 GO:0098609 -0.364601 Cell-cell adhesion (1) 4.0
599 GO:0017157 -0.372042 Regulation of exocytosis (1) 2.0
790 GO:0034644 -0.372186 Cellular response to uv (1) 1.0
568 GO:0008284 -0.372632 Positive regulation of cell population proliferation (1) 2.0
75 GO:0001818 -0.372931 Negative regulation of cytokine production (1) 1.0
610 GO:0031333 -0.373027 Negative regulation of protein-containing complex assembly (1) 2.0
770 GO:0009887 -0.378818 Animal organ morphogenesis (1) 4.0
18 GO:0000226 -0.382412 Microtubule cytoskeleton organization (1) 3.0
244 GO:0042307 -0.383034 Positive regulation of protein import into nucleus (1) 0.0
510 GO:0042063 -0.386958 Gliogenesis (1) 3.0
252 GO:0046889 -0.389004 Positive regulation of lipid biosynthetic process (1) 1.0
683 GO:0048477 -0.389075 Oogenesis (1) 1.0
686 GO:0097009 -0.390741 Energy homeostasis (1) 0.0
597 GO:0060627 -0.391978 Regulation of vesicle-mediated transport (1) 3.0
594 GO:0051128 -0.392248 Regulation of cellular component organization (1) 5.0
36 GO:0000723 -0.397902 Telomere maintenance (1) 1.0
579 GO:0040018 -0.399025 Positive regulation of multicellular organism growth (1) 0.0
55 GO:0001649 -0.403752 Osteoblast differentiation (1) 1.0
642 GO:0034599 -0.410954 Cellular response to oxidative stress (1) 3.0
318 GO:0006898 -0.412020 Receptor-mediated endocytosis (1) 2.0
518 GO:0042472 -0.414984 Inner ear morphogenesis (1) 1.0
816 GO:0032008 -0.416975 Positive regulation of tor signaling (1) 1.0
808 GO:1902532 -0.418085 Negative regulation of intracellular signal transduction (1) 3.0
135 GO:0042093 -0.420233 T-helper cell differentiation (1) 1.0
364 GO:0140013 -0.421435 Meiotic nuclear division (1) 2.0
536 GO:0007610 -0.428170 Behavior (1) 3.0
267 GO:0045732 -0.434807 Positive regulation of protein catabolic process (1) 2.0
789 GO:0009416 -0.439464 Response to light stimulus (1) 2.0
637 GO:0030198 -0.439683 Extracellular matrix organization (1) 2.0
82 GO:0032743 -0.458060 Positive regulation of interleukin-2 production (1) 0.0
694 GO:0032869 -0.463421 Cellular response to insulin stimulus (1) 2.0
646 GO:0071320 -0.466282 Cellular response to camp (1) 0.0
146 GO:0043029 -0.470875 T cell homeostasis (1) 1.0
911 GO:0060416 -0.475618 Response to growth hormone (1) 1.0
237 GO:0006511 -0.479298 Ubiquitin-dependent protein catabolic process (1) 3.0
819 GO:0014070 -0.480176 Response to organic cyclic compound (1) 4.0
896 GO:0097193 -0.483526 Intrinsic apoptotic signaling pathway (1) 3.0
134 GO:0002366 -0.488727 Leukocyte activation involved in immune response (1) 3.0
493 GO:0001946 -0.489050 Lymphangiogenesis (1) 0.0
689 GO:1905897 -0.495409 Regulation of response to endoplasmic reticulum stress (1) 2.0
478 GO:0048568 -0.497049 Embryonic organ development (1) 3.0
820 GO:0033993 -0.497561 Response to lipid (1) 3.0
934 GO:0051258 -0.500444 Protein polymerization (1) 3.0
800 GO:0030521 -0.503978 Androgen receptor signaling pathway (1) 1.0
276 GO:0016567 -0.510030 Protein ubiquitination (1) 3.0
663 GO:0050821 -0.512838 Protein stabilization (1) 0.0
768 GO:0051147 -0.515025 Regulation of muscle cell differentiation (1) 2.0
384 GO:0007018 -0.516240 Microtubule-based movement (1) 4.0
200 GO:0006281 -0.520134 Dna repair (1) 2.0
496 GO:0048608 -0.521065 Reproductive structure development (1) 2.0
921 GO:0035790 -0.529398 Platelet-derived growth factor receptor-alpha signaling pathway (1) 0.0
207 GO:0006303 -0.530286 Double-strand break repair via nonhomologous end joining (1) 1.0
548 GO:1903829 -0.534357 Positive regulation of protein localization (1) 3.0
889 GO:0098780 -0.534495 Response to mitochondrial depolarisation (1) 1.0
281 GO:0006811 -0.536119 Ion transport (1) 6.0
336 GO:0043524 -0.548174 Negative regulation of neuron apoptotic process (1) 1.0
184 GO:0006275 -0.549978 Regulation of dna replication (1) 2.0
96 GO:0048873 -0.558233 Homeostasis of number of cells within a tissue (1) 0.0
449 GO:0007204 -0.558363 Positive regulation of cytosolic calcium ion concentration (1) 4.0
744 GO:0055082 -0.561669 Cellular chemical homeostasis (1) 5.0
329 GO:0043066 -0.563158 Negative regulation of apoptotic process (1) 4.0
37 GO:0000724 -0.565085 Double-strand break repair via homologous recombination (1) 1.0
871 GO:0042551 -0.565380 Neuron maturation (1) 1.0
525 GO:0048741 -0.566143 Skeletal muscle fiber development (1) 1.0
557 GO:0072655 -0.572037 Establishment of protein localization to mitochondrion (1) 1.0
215 GO:0006417 -0.572163 Regulation of translation (1) 3.0
584 GO:0040008 -0.572873 Regulation of growth (1) 3.0
748 GO:0009056 -0.572913 Catabolic process (1) 5.0
556 GO:0051223 -0.574651 Regulation of protein transport (1) 3.0
433 GO:0097191 -0.576815 Extrinsic apoptotic signaling pathway (1) 3.0
502 GO:0007420 -0.577438 Brain development (1) 4.0
371 GO:0010821 -0.581599 Regulation of mitochondrion organization (1) 2.0
326 GO:0008637 -0.587399 Apoptotic mitochondrial changes (1) 1.0
767 GO:0051146 -0.588274 Striated muscle cell differentiation (1) 2.0
314 GO:0070588 -0.591082 Calcium ion transmembrane transport (1) 4.0
248 GO:0016042 -0.605006 Lipid catabolic process (1) 2.0
411 GO:0009755 -0.607054 Hormone-mediated signaling pathway (1) 2.0
182 GO:0016070 -0.607882 Rna metabolic process (1) 5.0
888 GO:0034976 -0.613362 Response to endoplasmic reticulum stress (1) 3.0
176 GO:0044262 -0.620829 Cellular carbohydrate metabolic process (1) 3.0
846 GO:0099504 -0.627336 Synaptic vesicle cycle (1) 2.0
528 GO:0007565 -0.627705 Female pregnancy (1) 2.0
211 GO:0031507 -0.627760 Heterochromatin assembly (1) 1.0
94 GO:0001889 -0.633760 Liver development (1) 1.0
52 GO:0001541 -0.634171 Ovarian follicle development (1) 1.0
137 GO:0002682 -0.635094 Regulation of immune system process (1) 5.0
620 GO:0051098 -0.635782 Regulation of binding (1) 3.0
266 GO:0030163 -0.652510 Protein catabolic process (1) 4.0
638 GO:0033554 -0.652740 Cellular response to stress (1) 4.0
777 GO:0050680 -0.661189 Negative regulation of epithelial cell proliferation (1) 2.0
425 GO:0060391 -0.664363 Positive regulation of smad protein signal transduction (1) 0.0
375 GO:0051496 -0.667875 Positive regulation of stress fiber assembly (1) 0.0
85 GO:0001823 -0.670272 Mesonephros development (1) 2.0
654 GO:0090398 -0.671529 Cellular senescence (1) 1.0
774 GO:0030216 -0.675144 Keratinocyte differentiation (1) 2.0
316 GO:0033157 -0.678469 Regulation of intracellular protein transport (1) 1.0
693 GO:1904646 -0.683366 Cellular response to amyloid-beta (1) 0.0
186 GO:2000278 -0.687722 Regulation of dna biosynthetic process (1) 1.0
418 GO:2001240 -0.689139 Negative regulation of extrinsic apoptotic signaling pathway in absence of ligand (1) 0.0
117 GO:0060562 -0.691717 Epithelial tube morphogenesis (1) 2.0
644 GO:0071230 -0.699092 Cellular response to amino acid stimulus (1) 1.0
298 GO:0006886 -0.703192 Intracellular protein transport (1) 3.0
473 GO:0008584 -0.709411 Male gonad development (1) 1.0
687 GO:0072384 -0.726414 Organelle transport along microtubule (1) 2.0
785 GO:0009266 -0.729968 Response to temperature stimulus (1) 2.0
116 GO:0002009 -0.733249 Morphogenesis of an epithelium (1) 3.0
867 GO:0045444 -0.735911 Fat cell differentiation (1) 2.0
564 GO:0045596 -0.736029 Negative regulation of cell differentiation (1) 2.0
804 GO:0030855 -0.743127 Epithelial cell differentiation (1) 3.0
887 GO:0034504 -0.747907 Protein localization to nucleus (1) 2.0
64 GO:0071456 -0.749105 Cellular response to hypoxia (1) 1.0
173 GO:0003300 -0.749363 Cardiac muscle hypertrophy (1) 2.0
422 GO:0046628 -0.749367 Positive regulation of insulin receptor signaling pathway (1) 0.0
783 GO:0008630 -0.759892 Intrinsic apoptotic signaling pathway in response to dna damage (1) 2.0
84 GO:0001822 -0.761027 Kidney development (1) 3.0
208 GO:0006325 -0.765908 Chromatin organization (1) 3.0
349 GO:0055118 -0.766833 Negative regulation of cardiac muscle contraction (1) 0.0
497 GO:0060041 -0.772237 Retina development in camera-type eye (1) 1.0
836 GO:1901987 -0.773018 Regulation of cell cycle phase transition (1) 3.0
850 GO:0070932 -0.775462 Histone h3 deacetylation (1) 0.0
522 GO:0048738 -0.777561 Cardiac muscle tissue development (1) 2.0
26 GO:1901990 -0.788841 Regulation of mitotic cell cycle phase transition (1) 2.0
606 GO:0048638 -0.795928 Regulation of developmental growth (1) 2.0
143 GO:0038096 -0.798475 Fc-gamma receptor signaling pathway involved in phagocytosis (1) 0.0
649 GO:0071478 -0.804830 Cellular response to radiation (1) 2.0
775 GO:0022612 -0.808633 Gland morphogenesis (1) 2.0
92 GO:0010718 -0.808642 Positive regulation of epithelial to mesenchymal transition (1) 1.0
534 GO:0030193 -0.809407 Regulation of blood coagulation (1) 2.0
185 GO:0051054 -0.817126 Positive regulation of dna metabolic process (1) 2.0
930 GO:1902074 -0.824018 Response to salt (1) 1.0
514 GO:0021575 -0.825947 Hindbrain morphogenesis (1) 1.0
876 GO:0046632 -0.839055 Alpha-beta t cell differentiation (1) 2.0
558 GO:0016032 -0.841189 Viral process (1) 3.0
490 GO:0007435 -0.854168 Salivary gland morphogenesis (1) 1.0
359 GO:0007010 -0.858980 Cytoskeleton organization (1) 4.0
718 GO:0008361 -0.861438 Regulation of cell size (1) 2.0
801 GO:0033143 -0.875603 Regulation of intracellular steroid hormone receptor signaling pathway (1) 1.0
179 GO:0006096 -0.888237 Glycolytic process (1) 1.0
157 GO:0048538 -0.899567 Thymus development (1) 0.0
886 GO:0034502 -0.908113 Protein localization to chromosome (1) 2.0
647 GO:0071392 -0.910313 Cellular response to estradiol stimulus (1) 0.0
457 GO:0035022 -0.913306 Positive regulation of rac protein signal transduction (1) 0.0
56 GO:0045668 -0.913637 Negative regulation of osteoblast differentiation (1) 0.0
843 GO:0014823 -0.918185 Response to activity (1) 1.0
376 GO:0007015 -0.919887 Actin filament organization (1) 3.0
166 GO:0030890 -0.921340 Positive regulation of b cell proliferation (1) 0.0
526 GO:0048743 -0.926052 Positive regulation of skeletal muscle fiber development (1) 0.0
21 GO:0051225 -0.929849 Spindle assembly (1) 2.0
596 GO:0060341 -0.932901 Regulation of cellular localization (1) 3.0
140 GO:0045321 -0.933341 Leukocyte activation (1) 5.0
855 GO:1903578 -0.938108 Regulation of atp metabolic process (1) 1.0
100 GO:0033138 -0.947197 Positive regulation of peptidyl-serine phosphorylation (1) 1.0
34 GO:1903146 -0.948778 Regulation of autophagy of mitochondrion (1) 1.0
120 GO:1905278 -0.954451 Positive regulation of epithelial tube formation (1) 0.0
726 GO:0065003 -0.957268 Protein-containing complex assembly (1) 4.0
330 GO:0071839 -0.961212 Apoptotic process in bone marrow cell (1) 0.0
902 GO:0046631 -0.968398 Alpha-beta t cell activation (1) 3.0
621 GO:0032410 -0.978890 Negative regulation of transporter activity (1) 1.0
17 GO:0000209 -0.995522 Protein polyubiquitination (1) 2.0
840 GO:0043154 -0.996503 Negative regulation of cysteine-type endopeptidase activity involved in apoptotic process (1) 1.0
35 GO:0061734 -0.998003 Parkin-mediated stimulation of mitophagy in response to mitochondrial depolarization (1) 0.0
387 GO:0051321 -1.010241 Meiotic cell cycle (1) 3.0
6 GO:0010971 -1.013845 Positive regulation of g2/m transition of mitotic cell cycle (1) 0.0
488 GO:0030878 -1.022587 Thyroid gland development (1) 0.0
598 GO:0043254 -1.024401 Regulation of protein-containing complex assembly (1) 3.0
161 GO:0030316 -1.031163 Osteoclast differentiation (1) 2.0
315 GO:0046902 -1.038293 Regulation of mitochondrial membrane permeability (1) 1.0
758 GO:0031099 -1.044014 Regeneration (1) 2.0
356 GO:0006996 -1.050513 Organelle organization (1) 5.0
547 GO:0032880 -1.061375 Regulation of protein localization (1) 4.0
470 GO:0042733 -1.062045 Embryonic digit morphogenesis (1) 0.0
590 GO:0050792 -1.063199 Regulation of viral process (1) 2.0
875 GO:0033077 -1.072052 T cell differentiation in thymus (1) 1.0
25 GO:0045930 -1.075304 Negative regulation of mitotic cell cycle (1) 2.0
293 GO:0032024 -1.085745 Positive regulation of insulin secretion (1) 1.0
305 GO:0034220 -1.105994 Ion transmembrane transport (1) 5.0
279 GO:0018205 -1.119880 Peptidyl-lysine modification (1) 4.0
300 GO:0032386 -1.120402 Regulation of intracellular transport (1) 2.0
19 GO:0031109 -1.121358 Microtubule polymerization or depolymerization (1) 2.0
302 GO:0006869 -1.132701 Lipid transport (1) 3.0
368 GO:0060271 -1.144707 Cilium assembly (1) 3.0
258 GO:0006694 -1.145934 Steroid biosynthetic process (1) 2.0
571 GO:2000010 -1.155468 Positive regulation of protein localization to cell surface (1) 0.0
880 GO:0031667 -1.156877 Response to nutrient levels (1) 4.0
220 GO:0016572 -1.176296 Histone phosphorylation (1) 1.0
805 GO:0090090 -1.179703 Negative regulation of canonical wnt signaling pathway (1) 0.0
856 GO:0019722 -1.180979 Calcium-mediated signaling (1) 2.0
481 GO:0030324 -1.181819 Lung development (1) 2.0
927 GO:0042632 -1.192404 Cholesterol homeostasis (1) 0.0
334 GO:2000811 -1.206493 Negative regulation of anoikis (1) 0.0
696 GO:0030336 -1.218248 Negative regulation of cell migration (1) 2.0
891 GO:0034405 -1.223979 Response to fluid shear stress (1) 1.0
61 GO:0001658 -1.225087 Branching involved in ureteric bud morphogenesis (1) 1.0
672 GO:0032091 -1.230462 Negative regulation of protein binding (1) 1.0
292 GO:0046883 -1.239220 Regulation of hormone secretion (1) 3.0
585 GO:0048589 -1.242926 Developmental growth (1) 4.0
634 GO:0061024 -1.243050 Membrane organization (1) 2.0
324 GO:0016241 -1.250511 Regulation of macroautophagy (1) 2.0
554 GO:0072659 -1.270281 Protein localization to plasma membrane (1) 2.0
476 GO:0048714 -1.271201 Positive regulation of oligodendrocyte differentiation (1) 0.0
603 GO:0061045 -1.272009 Negative regulation of wound healing (1) 2.0
917 GO:1900020 -1.277862 Positive regulation of protein kinase c activity (1) 0.0
407 GO:0030010 -1.292763 Establishment of cell polarity (1) 1.0
559 GO:0022414 -1.306268 Reproductive process (1) 4.0
199 GO:0006270 -1.310311 Dna replication initiation (1) 1.0
327 GO:0033028 -1.310645 Myeloid cell apoptotic process (1) 1.0
221 GO:0018105 -1.313496 Peptidyl-serine phosphorylation (1) 2.0
882 GO:0032147 -1.314908 Activation of protein kinase activity (1) 1.0
297 GO:0006839 -1.314960 Mitochondrial transport (1) 2.0
773 GO:0060612 -1.316095 Adipose tissue development (1) 1.0
815 GO:1901224 -1.319667 Positive regulation of nik/nf-kappab signaling (1) 0.0
551 GO:1903077 -1.319677 Negative regulation of protein localization to plasma membrane (1) 1.0
149 GO:0050853 -1.320551 B cell receptor signaling pathway (1) 1.0
807 GO:1901796 -1.330292 Regulation of signal transduction by p53 class mediator (1) 1.0
682 GO:0007026 -1.340212 Negative regulation of microtubule depolymerization (1) 0.0
515 GO:0021549 -1.341320 Cerebellum development (1) 2.0
798 GO:0051384 -1.346453 Response to glucocorticoid (1) 1.0
390 GO:0044770 -1.348006 Cell cycle phase transition (1) 4.0
198 GO:0006260 -1.359349 Dna replication (1) 3.0
614 GO:0035264 -1.365256 Multicellular organism growth (1) 1.0
5 GO:0000086 -1.366187 G2/m transition of mitotic cell cycle (1) 1.0
628 GO:0043467 -1.377923 Regulation of generation of precursor metabolites and energy (1) 2.0
616 GO:0001556 -1.391091 Oocyte maturation (1) 0.0
201 GO:0006310 -1.394633 Dna recombination (1) 3.0
243 GO:0006606 -1.399288 Protein import into nucleus (1) 1.0
104 GO:0006469 -1.403847 Negative regulation of protein kinase activity (1) 2.0
784 GO:0042771 -1.409468 Intrinsic apoptotic signaling pathway in response to dna damage by p53 class mediator (1) 1.0
204 GO:0006261 -1.419899 Dna-dependent dna replication (1) 2.0
2 GO:0000082 -1.425082 G1/s transition of mitotic cell cycle (1) 2.0
878 GO:0030593 -1.429383 Neutrophil chemotaxis (1) 1.0
839 GO:0031047 -1.438952 Gene silencing by rna (1) 2.0
15 GO:0070373 -1.439598 Negative regulation of erk1 and erk2 cascade (1) 0.0
367 GO:1901029 -1.442702 Negative regulation of mitochondrial outer membrane permeabilization involved in apoptotic signaling pathway (1) 0.0
385 GO:0060632 -1.444681 Regulation of microtubule-based movement (1) 1.0
285 GO:0051051 -1.447128 Negative regulation of transport (1) 3.0
1 GO:0045737 -1.452510 Positive regulation of cyclin-dependent protein serine/threonine kinase activity (1) 0.0
928 GO:0051453 -1.491236 Regulation of intracellular ph (1) 1.0
569 GO:0030307 -1.507144 Positive regulation of cell growth (1) 2.0
264 GO:0042177 -1.517738 Negative regulation of protein catabolic process (1) 1.0
442 GO:0008286 -1.524273 Insulin receptor signaling pathway (1) 1.0
524 GO:0007519 -1.531881 Skeletal muscle tissue development (1) 2.0
65 GO:0001701 -1.542908 In utero embryonic development (1) 2.0
174 GO:0010613 -1.546047 Positive regulation of cardiac muscle hypertrophy (1) 1.0
167 GO:0045637 -1.553460 Regulation of myeloid cell differentiation (1) 2.0
213 GO:0006396 -1.579762 Rna processing (1) 4.0
925 GO:1990403 -1.599465 Embryonic brain development (1) 0.0
62 GO:0001662 -1.634974 Behavioral fear response (1) 1.0
112 GO:0060789 -1.642170 Hair follicle placode formation (1) 0.0
321 GO:0010507 -1.662573 Negative regulation of autophagy (1) 1.0
193 GO:0006368 -1.663610 Transcription elongation from rna polymerase ii promoter (1) 1.0
910 GO:0051354 -1.665789 Negative regulation of oxidoreductase activity (1) 1.0
648 GO:0071549 -1.672280 Cellular response to dexamethasone stimulus (1) 0.0
240 GO:0006605 -1.701265 Protein targeting (1) 2.0
251 GO:0045833 -1.715199 Negative regulation of lipid metabolic process (1) 2.0
341 GO:2001236 -1.716145 Regulation of extrinsic apoptotic signaling pathway (1) 2.0
322 GO:0010508 -1.734826 Positive regulation of autophagy (1) 2.0
344 GO:1902236 -1.763757 Negative regulation of endoplasmic reticulum stress-induced intrinsic apoptotic signaling pathway (1) 0.0
828 GO:0010039 -1.764349 Response to iron ion (1) 1.0
577 GO:2001021 -1.770162 Negative regulation of response to dna damage stimulus (1) 1.0
342 GO:2001243 -1.793574 Negative regulation of intrinsic apoptotic signaling pathway (1) 2.0
830 GO:0071480 -1.811677 Cellular response to gamma radiation (1) 0.0
241 GO:0006612 -1.832631 Protein targeting to membrane (1) 1.0
550 GO:1904950 -1.845239 Negative regulation of establishment of protein localization (1) 2.0
586 GO:2000773 -1.847992 Negative regulation of cellular senescence (1) 0.0
33 GO:0000423 -1.852021 Mitophagy (1) 1.0
145 GO:0001782 -1.865421 B cell homeostasis (1) 0.0
527 GO:0007528 -1.888030 Neuromuscular junction development (1) 1.0
931 GO:0002931 -1.889223 Response to ischemia (1) 0.0
111 GO:0031069 -1.907723 Hair follicle morphogenesis (1) 0.0
575 GO:0030308 -1.913032 Negative regulation of cell growth (1) 1.0
393 GO:1901988 -1.952405 Negative regulation of cell cycle phase transition (1) 2.0
68 GO:0042659 -1.958074 Regulation of cell fate specification (1) 0.0
834 GO:0035195 -1.964770 Gene silencing by mirna (1) 1.0
271 GO:2000757 -1.982632 Negative regulation of peptidyl-lysine acetylation (1) 1.0
451 GO:0007263 -1.986508 Nitric oxide mediated signal transduction (1) 1.0
677 GO:0010977 -2.012737 Negative regulation of neuron projection development (1) 1.0
822 GO:0046898 -2.020251 Response to cycloheximide (1) 0.0
893 GO:0035094 -2.023491 Response to nicotine (1) 1.0
635 GO:0099173 -2.072765 Postsynapse organization (1) 2.0
884 GO:0032469 -2.081057 Endoplasmic reticulum calcium ion homeostasis (1) 1.0
538 GO:0048266 -2.139693 Behavioral response to pain (1) 0.0
787 GO:0042149 -2.188398 Cellular response to glucose starvation (1) 0.0
756 GO:0021695 -2.195335 Cerebellar cortex development (1) 1.0
662 GO:0031648 -2.195396 Protein destabilization (1) 0.0
727 GO:0070842 -2.214919 Aggresome assembly (1) 0.0
4 GO:2000134 -2.218448 Negative regulation of g1/s transition of mitotic cell cycle (1) 1.0
500 GO:0046666 -2.221412 Retinal cell programmed cell death (1) 0.0
151 GO:0006959 -2.228889 Humoral immune response (1) 2.0
216 GO:0045727 -2.232845 Positive regulation of translation (1) 1.0
274 GO:0034983 -2.241321 Peptidyl-lysine deacetylation (1) 0.0
565 GO:1902455 -2.310039 Negative regulation of stem cell population maintenance (1) 0.0
372 GO:0070584 -2.320318 Mitochondrion morphogenesis (1) 0.0
343 GO:1902166 -2.325709 Negative regulation of intrinsic apoptotic signaling pathway in response to dna damage by p53 class mediator (1) 0.0
512 GO:0008045 -2.344717 Motor neuron axon guidance (1) 1.0
769 GO:0010832 -2.404325 Negative regulation of myotube differentiation (1) 0.0
905 GO:0050864 -2.430784 Regulation of b cell activation (1) 2.0
338 GO:1900118 -2.496567 Negative regulation of execution phase of apoptosis (1) 0.0
426 GO:0090263 -2.498030 Positive regulation of canonical wnt signaling pathway (1) 0.0
540 GO:0007617 -2.520665 Mating behavior (1) 1.0
872 GO:0002326 -2.609504 B cell lineage commitment (1) 0.0
680 GO:0031640 -2.667289 Killing of cells of another organism (1) 1.0
278 GO:0016573 -2.686301 Histone acetylation (1) 3.0
156 GO:0048536 -2.714980 Spleen development (1) 0.0
308 GO:0051926 -2.798095 Negative regulation of calcium ion transport (1) 1.0
721 GO:1905710 -2.815392 Positive regulation of membrane permeability (1) 1.0
665 GO:0045636 -2.820949 Positive regulation of melanocyte differentiation (1) 0.0
719 GO:0043244 -2.878623 Regulation of protein-containing complex disassembly (1) 2.0
195 GO:0006360 -2.938382 Transcription by rna polymerase i (1) 2.0
22 GO:0007098 -3.141939 Centrosome cycle (1) 2.0
892 GO:0070059 -3.411097 Intrinsic apoptotic signaling pathway in response to endoplasmic reticulum stress (1) 1.0
189 GO:1903800 -3.468461 Positive regulation of production of mirnas involved in gene silencing by mirna (1) 0.0
844 GO:0043922 -3.856003 Negative regulation by host of viral transcription (1) 0.0
452 GO:0010750 -3.917943 Positive regulation of nitric oxide mediated signal transduction (1) 0.0
852 GO:0036289 -8.673968 Peptidyl-serine autophosphorylation (1) 0.0

Final model SVM

Once the models have been cross-validated we create the final models using all samples…

GO_terms_auc_svm_final = {}
GO_terms_aupr_svm_final = {}
GO_terms_precision_svm_final = {}
models_svm = {}

# Perform logistics
for goterm in sparseGO_terms:
    #print(goterm)
    goterm_drugs = slim_matrix.loc[[goterm+"_"+str(1)]].values.flatten()
    
    if sum(goterm_drugs) <= 8:
        continue

    list_nodes = []
    for i in range(1,7):
        list_nodes.append(goterm+"_"+str(i))

    score = attribution_data_annotated.loc[list_nodes].T
    score_mod = score.divide(score.std()).fillna(0)
    
    X_train = score_mod
    X_test = score_mod
    y_train = goterm_drugs
    y_test = goterm_drugs
    
    #gamma = 1/(X_train.shape[1]*X_train.to_numpy().var())
    gamma="scale"
    C=1
    
    
        
    svm_model = svm.SVC(C=C,gamma=gamma, kernel='rbf',
                           class_weight="balanced",
                            tol=0.001,
                            probability=True,
                           random_state=1234)
    # fit the model with data
    svm_model.fit(X_train,y_train)
    y_pred=svm_model.predict(X_test)

    #auc
    y_pred_proba = svm_model.predict_proba(X_test)[::,1]  # platt values
    #y_pred_proba = svm_model.decision_function(X_test)
    
    GO_terms_auc_svm_final[goterm] = metrics.roc_auc_score(y_test, y_pred_proba)

    precision, recall, thresholds = metrics.precision_recall_curve(y_test, y_pred_proba)
    GO_terms_aupr_svm_final[goterm] = metrics.auc(recall, precision)
    GO_terms_precision_svm_final[goterm] = metrics.precision_score(y_test, y_pred)
    models_svm[goterm]=svm_model
len(models_svm)
939

Final model AUC

GO_terms_auc_svm_df_final = pd.DataFrame(list(GO_terms_auc_svm_final.items()),columns = ['goterm','auc']).set_index("goterm")
GO_terms_auc_svm_df_final = GO_terms_auc_svm_df_final.dropna()
GO_terms_auc_svm_df_final.sort_values(by=["auc"], ascending=False)
auc
goterm
GO:0036289 1.000000
GO:0060440 0.998540
GO:0043162 0.995455
GO:0070059 0.994760
GO:0071364 0.994109
GO:1901029 0.994048
GO:0072384 0.993636
GO:0051453 0.993393
GO:0001556 0.991972
GO:0090201 0.991808
GO:0010750 0.990909
GO:0016573 0.990783
GO:1903800 0.990573
GO:1904950 0.989945
GO:1902455 0.989091
GO:0042149 0.987697
GO:0034983 0.987273
GO:1990403 0.985909
GO:0071353 0.985587
GO:0006275 0.984226
GO:0010971 0.984091
GO:0006869 0.983409
GO:0001779 0.983182
GO:0051973 0.981651
GO:0060749 0.980895
GO:0042771 0.980633
GO:0072655 0.980455
GO:0061734 0.980455
GO:0045636 0.980178
GO:0045737 0.980084
GO:1902236 0.979762
GO:0060632 0.979545
GO:0016575 0.978731
GO:0042659 0.977727
GO:0046628 0.977376
GO:1902042 0.977273
GO:0098780 0.975909
GO:0046902 0.975849
GO:0051607 0.975552
GO:0006401 0.974678
GO:0017157 0.974040
GO:0032740 0.973856
GO:0006270 0.973848
GO:0046666 0.973570
GO:0008045 0.972603
GO:0006303 0.972553
GO:0042177 0.972431
GO:0060020 0.972290
GO:0006360 0.972095
GO:2001021 0.971520
GO:0042733 0.971364
GO:0016572 0.971342
GO:0070932 0.970909
GO:2001257 0.970909
GO:0001782 0.970384
GO:0006261 0.970112
GO:1905564 0.969834
GO:2000757 0.969545
GO:0051354 0.969091
GO:0072284 0.969069
GO:0051926 0.968891
GO:0043407 0.968585
GO:0034394 0.968096
GO:0050870 0.967621
GO:0046898 0.967143
GO:0031047 0.967115
GO:0016925 0.966364
GO:0035790 0.966361
GO:0006417 0.965261
GO:0032469 0.965008
GO:0035195 0.964816
GO:0021782 0.964091
GO:0070584 0.963810
GO:0051384 0.961083
GO:0002326 0.960811
GO:2000773 0.960310
GO:0050729 0.959779
GO:0046942 0.959480
GO:0035249 0.959091
GO:0045821 0.958904
GO:0099111 0.958880
GO:0071670 0.958851
GO:0006367 0.958333
GO:1905278 0.958270
GO:0010559 0.957929
GO:0006959 0.957854
GO:0018205 0.957782
GO:0035860 0.957768
GO:0031640 0.957381
GO:0007059 0.957268
GO:0070373 0.956762
GO:0030282 0.956762
GO:0001658 0.956522
GO:0030890 0.956075
GO:0035754 0.955757
GO:0010832 0.955455
GO:0099173 0.955238
GO:0021695 0.955238
GO:0045727 0.955026
GO:0002862 0.954696
GO:0014827 0.954432
GO:0016579 0.953923
GO:0002718 0.953854
GO:0071320 0.953746
GO:0051281 0.953182
GO:0042552 0.953182
GO:0000086 0.953095
GO:0032147 0.952991
GO:0032436 0.952499
GO:0010592 0.952273
GO:0006694 0.951735
GO:0033141 0.951735
GO:0071480 0.951429
GO:0006612 0.951118
GO:0048011 0.950729
GO:1903077 0.950714
GO:0033619 0.950455
GO:0006352 0.950306
GO:0001662 0.950221
GO:0010039 0.950040
GO:0090314 0.949147
GO:0034502 0.949074
GO:0014823 0.948954
GO:2001240 0.948220
GO:0007617 0.948182
GO:0032743 0.947281
GO:0006310 0.947141
GO:0006605 0.946678
GO:0006975 0.946204
GO:2000739 0.946101
GO:1902459 0.945909
GO:0007626 0.945701
GO:0023019 0.945116
GO:0003376 0.944700
GO:0006576 0.944346
GO:0038007 0.943690
GO:0050728 0.943637
GO:0032922 0.942661
GO:0045740 0.942465
GO:1900118 0.942381
GO:0010952 0.942143
GO:1905710 0.942143
GO:1902166 0.942128
GO:0008637 0.941950
GO:2000010 0.941865
GO:0055118 0.941679
GO:0000423 0.941364
GO:0043154 0.941156
GO:0048701 0.940775
GO:0008210 0.940749
GO:1900272 0.940171
GO:0060997 0.939809
GO:0007263 0.939545
GO:2000379 0.939167
GO:1900020 0.939091
GO:0050896 0.938915
GO:0016485 0.938636
GO:0043966 0.938376
GO:0002437 0.938295
GO:2000300 0.937318
GO:0140013 0.937095
GO:0034767 0.936758
GO:0031648 0.936624
GO:0007026 0.936364
GO:0032024 0.936149
GO:0030193 0.936040
GO:0010212 0.935098
GO:0006457 0.934641
GO:0032729 0.934420
GO:0030593 0.934413
GO:0010575 0.934272
GO:0008064 0.933643
GO:0008286 0.932331
GO:0001818 0.932128
GO:0030513 0.931404
GO:0060766 0.931364
GO:0006396 0.931346
GO:0006919 0.931342
GO:0038096 0.930886
GO:0001553 0.930810
GO:0045580 0.930407
GO:0046326 0.930406
GO:0035025 0.930294
GO:1903146 0.929091
GO:0060444 0.929091
GO:0006412 0.928571
GO:0048536 0.928290
GO:0002819 0.927685
GO:0048704 0.927370
GO:0051054 0.927333
GO:0090184 0.927099
GO:1900006 0.926941
GO:2000134 0.926917
GO:0046889 0.926822
GO:0043123 0.926512
GO:0070842 0.926364
GO:0046329 0.926364
GO:0006898 0.925891
GO:0006368 0.925841
GO:1905897 0.925743
GO:0030048 0.925591
GO:0042180 0.925076
GO:0035909 0.924883
GO:0051209 0.924065
GO:0030308 0.923951
GO:0043170 0.923707
GO:0035726 0.922783
GO:0031663 0.922727
GO:0000209 0.922119
GO:0009165 0.921544
GO:0002720 0.921427
GO:0006096 0.921292
GO:1902036 0.921254
GO:0071549 0.921066
GO:0007528 0.920950
GO:0090090 0.920930
GO:0042472 0.920455
GO:0031056 0.920429
GO:0050864 0.920262
GO:0060789 0.920000
GO:0007389 0.919762
GO:0048743 0.919572
GO:0030705 0.919116
GO:0060179 0.919091
GO:0045739 0.918823
GO:0043627 0.917977
GO:0040018 0.917659
GO:2001243 0.917078
GO:0090037 0.917056
GO:0040016 0.915987
GO:0043552 0.915951
GO:0001666 0.915013
GO:0010508 0.914755
GO:0033690 0.914545
GO:0098586 0.914419
GO:0043922 0.914091
GO:0035994 0.914021
GO:0031398 0.913694
GO:0042093 0.913524
GO:0032410 0.913182
GO:1901224 0.913182
GO:0006839 0.913167
GO:0045907 0.912844
GO:2000278 0.912619
GO:2001236 0.912563
GO:0048170 0.912474
GO:0071839 0.912217
GO:0031507 0.911552
GO:0060391 0.911011
GO:0032148 0.910451
GO:0070102 0.910000
GO:0030878 0.909762
GO:0035162 0.909463
GO:0051225 0.909314
GO:0002931 0.909064
GO:0007411 0.908683
GO:0008625 0.908500
GO:0035788 0.908313
GO:0010921 0.907360
GO:0048266 0.906977
GO:0010977 0.906667
GO:0050910 0.906656
GO:0045732 0.906062
GO:0046620 0.905714
GO:0035855 0.905551
GO:0030316 0.905551
GO:0006469 0.905340
GO:0090263 0.905136
GO:0021953 0.904874
GO:0060312 0.904790
GO:0006260 0.904703
GO:0030521 0.904434
GO:0008016 0.904091
GO:0010727 0.904091
GO:0030509 0.904035
GO:0007498 0.903914
GO:0050769 0.903592
GO:0050792 0.903414
GO:0009582 0.903167
GO:0007098 0.902745
GO:0002821 0.902464
GO:0071276 0.902162
GO:0007286 0.901132
GO:0045088 0.900952
GO:0055003 0.900943
GO:0035767 0.900748
GO:0045987 0.900474
GO:0061029 0.900474
GO:0033327 0.900465
GO:0000422 0.900374
GO:0010976 0.900117
GO:0008354 0.899895
GO:0070528 0.899726
GO:0006807 0.899601
GO:0045833 0.899128
GO:1905065 0.898923
GO:0007018 0.898915
GO:0007422 0.898647
GO:0048484 0.898636
GO:0032467 0.898182
GO:0050795 0.897909
GO:0030539 0.897909
GO:0048538 0.897833
GO:0032355 0.897646
GO:0007416 0.897554
GO:0021575 0.897509
GO:0060348 0.897410
GO:0001569 0.897282
GO:0060384 0.897171
GO:0031069 0.897099
GO:0050918 0.897059
GO:0035584 0.896905
GO:0051046 0.896369
GO:0043129 0.896233
GO:0001843 0.896024
GO:0046330 0.895444
GO:0007030 0.895429
GO:0048873 0.895092
GO:0000724 0.894922
GO:0007202 0.894511
GO:1903053 0.894419
GO:0003338 0.894238
GO:1901990 0.894150
GO:0060644 0.893917
GO:0043161 0.893782
GO:0030838 0.892727
GO:0001946 0.892571
GO:0072210 0.892039
GO:0030101 0.892003
GO:0050731 0.892003
GO:0010613 0.891865
GO:0030325 0.891865
GO:0048714 0.891783
GO:0048008 0.891667
GO:0001823 0.890989
GO:0016239 0.890496
GO:0030216 0.890460
GO:0071300 0.890341
GO:0032008 0.889952
GO:0061045 0.889881
GO:0051894 0.889619
GO:0030010 0.889612
GO:0031016 0.889533
GO:0001942 0.889526
GO:1902533 0.889155
GO:0016358 0.888660
GO:0001501 0.888280
GO:0051092 0.888251
GO:0016601 0.887883
GO:0097067 0.887324
GO:0009306 0.887019
GO:0048167 0.886555
GO:0050921 0.886315
GO:1990384 0.886268
GO:0046883 0.886202
GO:0007519 0.886154
GO:0043270 0.885881
GO:0003007 0.885720
GO:0071900 0.885420
GO:0007585 0.885391
GO:2001214 0.885258
GO:0071456 0.884685
GO:0016567 0.884594
GO:0060740 0.882856
GO:0035094 0.882732
GO:0072073 0.882732
GO:0060612 0.881602
GO:0060325 0.881498
GO:0045668 0.881347
GO:0042531 0.881332
GO:0010038 0.881167
GO:0071333 0.880972
GO:0006939 0.880907
GO:0090141 0.880907
GO:0046718 0.880697
GO:0051770 0.880461
GO:0033627 0.880455
GO:0048149 0.880352
GO:0002685 0.880291
GO:0043029 0.880195
GO:0038033 0.879699
GO:0055119 0.879336
GO:0003300 0.878843
GO:0005984 0.878788
GO:0002218 0.878773
GO:0072239 0.878669
GO:0031103 0.878667
GO:0048557 0.878638
GO:1901987 0.878627
GO:0060048 0.877703
GO:0045637 0.877659
GO:2001234 0.877406
GO:0038083 0.876762
GO:0071277 0.876323
GO:0048839 0.876278
GO:0000723 0.875714
GO:0060627 0.875648
GO:0035022 0.874811
GO:0007435 0.874669
GO:2001241 0.874309
GO:0002062 0.874091
GO:0035234 0.873792
GO:0034976 0.873754
GO:0007584 0.872411
GO:0002318 0.872408
GO:0001975 0.872354
GO:0071230 0.871837
GO:0034446 0.871788
GO:0070933 0.871364
GO:0030072 0.871331
GO:0071897 0.871171
GO:0035733 0.870478
GO:0032967 0.870403
GO:0048675 0.870071
GO:0060571 0.870035
GO:0050920 0.869917
GO:0050678 0.869106
GO:0034405 0.869048
GO:0051150 0.868932
GO:0001934 0.868720
GO:0010507 0.868700
GO:1904707 0.868636
GO:0050821 0.868325
GO:0006811 0.868262
GO:0070588 0.868155
GO:0014911 0.867596
GO:0090280 0.867440
GO:0008630 0.867386
GO:1901796 0.867386
GO:0051056 0.867368
GO:0051321 0.865996
GO:0051051 0.865833
GO:0051902 0.865573
GO:0097009 0.865089
GO:0060271 0.865061
GO:0045930 0.864995
GO:0035304 0.864977
GO:0051899 0.864866
GO:0033028 0.864808
GO:0018108 0.864767
GO:1900087 0.864434
GO:0010467 0.863952
GO:0035019 0.863557
GO:0006687 0.863557
GO:0001824 0.863532
GO:0033689 0.863522
GO:0071392 0.863443
GO:0035264 0.863252
GO:0046632 0.862800
GO:0034605 0.862619
GO:0032091 0.862599
GO:0072659 0.862358
GO:0051901 0.861670
GO:0006357 0.861504
GO:0042475 0.861448
GO:0045747 0.861374
GO:0072006 0.860598
GO:0042220 0.860483
GO:0006937 0.860353
GO:0006511 0.860111
GO:0010718 0.859229
GO:0035924 0.859169
GO:0090398 0.859050
GO:0031532 0.858981
GO:1904062 0.858745
GO:2000251 0.858605
GO:0014068 0.858156
GO:0048146 0.858102
GO:0051090 0.857756
GO:0034765 0.857317
GO:0007229 0.856812
GO:0007158 0.856712
GO:1901031 0.856712
GO:0061351 0.856372
GO:1904019 0.856183
GO:0048812 0.856107
GO:0060437 0.855565
GO:0034766 0.854758
GO:0033143 0.854574
GO:0007269 0.854497
GO:0032516 0.854484
GO:0036120 0.854433
GO:0090068 0.853947
GO:0046854 0.853881
GO:0010811 0.853842
GO:0060976 0.853774
GO:0060045 0.853680
GO:0021549 0.853311
GO:0043534 0.853142
GO:0038084 0.853135
GO:0046427 0.852947
GO:0030324 0.852866
GO:0048010 0.852488
GO:0097193 0.852297
GO:0048286 0.852143
GO:0006468 0.851852
GO:0060326 0.851772
GO:0034097 0.851678
GO:0016071 0.851667
GO:0036324 0.851085
GO:1903010 0.851085
GO:0002327 0.850962
GO:0001570 0.850955
GO:0043536 0.850601
GO:0043406 0.850494
GO:0045347 0.850455
GO:0001701 0.850196
GO:0019222 0.849913
GO:0051403 0.849741
GO:0097021 0.849170
GO:0043467 0.848706
GO:0045766 0.848621
GO:0060562 0.848060
GO:0030001 0.847486
GO:0006810 0.847446
GO:0031667 0.847070
GO:0048565 0.846000
GO:0019827 0.845649
GO:0007565 0.845356
GO:0009966 0.844893
GO:0055085 0.844768
GO:0043114 0.844749
GO:0002548 0.844626
GO:2000377 0.844341
GO:0030198 0.844187
GO:0032386 0.844167
GO:0031929 0.844150
GO:0035306 0.843956
GO:0006897 0.843955
GO:0051301 0.843815
GO:0001656 0.843809
GO:0042060 0.843773
GO:0031109 0.843563
GO:0000122 0.843521
GO:0043124 0.843017
GO:0001837 0.842638
GO:1902275 0.841719
GO:0051261 0.841719
GO:0051924 0.841520
GO:0002250 0.841465
GO:0030336 0.841059
GO:0046631 0.840909
GO:0016055 0.840841
GO:0033077 0.840735
GO:0048741 0.840370
GO:0007266 0.839667
GO:0001938 0.838948
GO:0043586 0.838898
GO:0008277 0.837920
GO:0043303 0.837858
GO:0070662 0.837526
GO:0060374 0.836916
GO:0045087 0.836889
GO:0034220 0.836107
GO:0032388 0.835532
GO:0048568 0.835305
GO:0050866 0.835227
GO:0009058 0.834946
GO:1902074 0.834912
GO:0043244 0.834906
GO:0008542 0.834749
GO:0045055 0.834433
GO:0045444 0.834286
GO:0046578 0.834019
GO:0046777 0.833773
GO:0001889 0.833595
GO:0008584 0.833556
GO:0045840 0.833536
GO:0002366 0.833530
GO:0007049 0.833424
GO:0046474 0.833392
GO:0019233 0.833182
GO:0000165 0.832917
GO:0051258 0.832656
GO:0032956 0.832450
GO:0022612 0.832326
GO:0051050 0.832281
GO:0043392 0.831905
GO:0031274 0.831814
GO:0051702 0.831506
GO:0010564 0.831039
GO:0031099 0.830615
GO:1905563 0.830607
GO:0030318 0.830136
GO:0048598 0.829861
GO:0007165 0.829719
GO:1901988 0.829474
GO:0007186 0.829429
GO:0033157 0.829023
GO:0019221 0.829000
GO:0000278 0.828800
GO:0042310 0.828784
GO:1901300 0.828616
GO:0006909 0.828497
GO:0030154 0.828332
GO:0002573 0.827001
GO:0045429 0.826889
GO:0051223 0.826823
GO:0016570 0.826822
GO:0030163 0.826442
GO:0009791 0.826355
GO:0090630 0.826069
GO:0032409 0.825426
GO:0048477 0.824868
GO:0034644 0.824849
GO:0007346 0.824841
GO:0046651 0.824539
GO:0051171 0.823977
GO:0000302 0.823816
GO:0048608 0.823637
GO:0032940 0.823481
GO:0008610 0.823469
GO:0010628 0.823151
GO:1903078 0.822244
GO:0016032 0.821730
GO:0009888 0.821458
GO:0016042 0.821320
GO:0007259 0.820971
GO:0008544 0.820813
GO:0000077 0.820719
GO:0021766 0.820586
GO:0001817 0.819733
GO:0001932 0.819683
GO:0002053 0.819493
GO:0072593 0.819390
GO:0009887 0.819242
GO:0006753 0.818971
GO:0071383 0.818684
GO:0007015 0.818627
GO:0001819 0.818452
GO:0007275 0.818394
GO:1903829 0.818083
GO:0002244 0.818060
GO:0051898 0.817795
GO:0009410 0.817265
GO:0030335 0.817025
GO:0061024 0.816492
GO:0007173 0.816349
GO:0050900 0.816242
GO:0060395 0.815909
GO:0009755 0.815667
GO:0045860 0.815613
GO:0050872 0.815367
GO:0007612 0.814548
GO:0000082 0.814519
GO:0050852 0.814267
GO:0043408 0.813977
GO:0002009 0.813874
GO:0019752 0.813530
GO:0001822 0.813506
GO:0007179 0.813500
GO:0051049 0.813439
GO:0010033 0.813421
GO:1901135 0.813379
GO:1900180 0.813213
GO:0033554 0.813172
GO:0007204 0.813136
GO:0044770 0.812960
GO:0001755 0.812831
GO:0001541 0.812614
GO:0006470 0.811795
GO:0009743 0.811594
GO:0033993 0.811585
GO:0035265 0.811041
GO:0051496 0.811040
GO:0007162 0.810927
GO:0030218 0.809955
GO:0006139 0.809816
GO:0070374 0.808642
GO:0006298 0.808612
GO:0009056 0.808581
GO:0070507 0.808431
GO:0071363 0.808295
GO:0050680 0.808234
GO:0007169 0.807939
GO:0001894 0.807870
GO:0000902 0.806862
GO:0009617 0.806711
GO:1902904 0.806512
GO:0030097 0.806125
GO:0007399 0.805949
GO:0050853 0.805230
GO:0051726 0.804914
GO:0008360 0.804780
GO:0050863 0.804772
GO:0010629 0.804702
GO:0032880 0.804305
GO:0021795 0.804198
GO:0046488 0.804184
GO:0031032 0.804004
GO:0045595 0.803077
GO:0006936 0.802344
GO:0045793 0.802149
GO:0071222 0.801980
GO:0051897 0.801416
GO:0006606 0.800953
GO:0006886 0.800872
GO:0030307 0.800490
GO:0048738 0.800331
GO:0010821 0.800220
GO:0051247 0.800154
GO:0042752 0.800120
GO:0032835 0.800025
GO:0033138 0.799982
GO:1903578 0.799701
GO:0050673 0.798946
GO:0006997 0.798672
GO:0060341 0.798662
GO:0006281 0.798556
GO:0042391 0.798475
GO:0050808 0.797394
GO:0007267 0.797360
GO:0050865 0.797107
GO:0018105 0.797070
GO:0060560 0.796569
GO:0071478 0.796131
GO:0018107 0.796045
GO:0019216 0.795977
GO:0023061 0.795969
GO:0036473 0.795897
GO:0051147 0.795455
GO:0006996 0.794900
GO:0030217 0.794761
GO:0070527 0.794579
GO:0050804 0.793936
GO:0060021 0.793808
GO:0045321 0.793792
GO:0046034 0.792891
GO:1904646 0.792812
GO:0030182 0.792624
GO:0002764 0.790893
GO:0007596 0.790844
GO:0043542 0.790474
GO:0006355 0.790400
GO:0010638 0.790227
GO:0042110 0.789916
GO:2000811 0.789519
GO:0045785 0.789271
GO:0001952 0.789204
GO:0048709 0.787833
GO:0016192 0.787802
GO:0002320 0.787705
GO:0045944 0.787650
GO:0035051 0.787216
GO:0070663 0.786907
GO:0046486 0.786765
GO:0006914 0.786701
GO:0071407 0.786480
GO:0048468 0.786471
GO:0043065 0.786229
GO:1902532 0.786009
GO:0033044 0.785934
GO:0031333 0.785379
GO:0071417 0.785307
GO:0016241 0.785238
GO:0007268 0.785105
GO:0007010 0.785047
GO:0002443 0.783904
GO:2000270 0.783308
GO:0001764 0.782709
GO:0051174 0.781935
GO:0034329 0.781439
GO:0043549 0.781269
GO:0010595 0.781136
GO:2001020 0.780899
GO:0050776 0.780250
GO:0007159 0.780220
GO:0048041 0.780105
GO:0016236 0.779569
GO:0048638 0.778556
GO:0042551 0.778521
GO:0007517 0.778474
GO:0032869 0.777921
GO:0051649 0.777222
GO:0009725 0.777056
GO:0030855 0.776398
GO:0002040 0.776347
GO:0071310 0.775759
GO:0042063 0.775499
GO:0009266 0.775262
GO:0048469 0.774721
GO:0042307 0.774054
GO:0032879 0.772742
GO:0002376 0.772696
GO:0055082 0.772549
GO:0016070 0.772150
GO:0060840 0.771853
GO:0010632 0.771656
GO:0007219 0.771429
GO:0051341 0.770833
GO:0060416 0.770267
GO:0090050 0.770256
GO:0002274 0.770035
GO:0009968 0.768538
GO:0009416 0.768293
GO:0009653 0.767978
GO:0030183 0.767941
GO:0007507 0.766819
GO:0007283 0.766625
GO:0048589 0.766590
GO:0050790 0.766284
GO:0065003 0.765562
GO:0030032 0.765559
GO:0048103 0.765258
GO:0006954 0.764565
GO:0048878 0.764329
GO:0007420 0.764092
GO:0030168 0.762921
GO:0006629 0.761422
GO:0006644 0.760398
GO:0001525 0.760172
GO:0120035 0.759979
GO:0034103 0.759958
GO:0014070 0.759563
GO:0044255 0.758471
GO:0051098 0.758377
GO:0051641 0.757853
GO:0034599 0.756607
GO:0043473 0.756079
GO:0036092 0.755500
GO:0048863 0.755435
GO:2000352 0.754950
GO:0030162 0.754327
GO:0042325 0.754119
GO:0008202 0.754059
GO:0033628 0.753988
GO:0051146 0.753713
GO:0010243 0.753077
GO:0043524 0.752485
GO:0003014 0.752381
GO:0002684 0.752271
GO:0001763 0.751863
GO:0051145 0.751530
GO:0045596 0.750733
GO:0000226 0.750411
GO:0031175 0.749603
GO:0007155 0.749178
GO:0002064 0.748667
GO:0045597 0.748593
GO:0040008 0.748313
GO:0060485 0.746706
GO:0006508 0.746456
GO:0097191 0.746084
GO:0016477 0.745481
GO:0005975 0.745307
GO:0043066 0.745136
GO:0050890 0.744664
GO:0007265 0.744598
GO:0032092 0.743751
GO:0051017 0.743352
GO:0007005 0.742652
GO:0043434 0.742583
GO:0003158 0.742221
GO:0042113 0.742005
GO:0120162 0.741508
GO:0051881 0.740169
GO:0030522 0.740000
GO:0007160 0.739980
GO:0048511 0.737766
GO:0044281 0.736585
GO:0007568 0.736500
GO:0007610 0.735604
GO:0035556 0.734890
GO:0048017 0.734281
GO:0006325 0.734050
GO:0006915 0.733434
GO:0008284 0.731624
GO:0045165 0.731183
GO:0002682 0.730932
GO:0022414 0.730731
GO:0097190 0.730349
GO:0051494 0.729426
GO:0051128 0.728822
GO:0043254 0.728497
GO:0098609 0.727096
GO:0006338 0.726229
GO:0007423 0.725167
GO:0001649 0.721908
GO:0048661 0.721041
GO:0010941 0.719507
GO:1900407 0.716777
GO:0007166 0.714859
GO:0002683 0.708778
GO:1902903 0.708773
GO:0008285 0.705969
GO:0030900 0.704312
GO:0034504 0.704188
GO:0033365 0.703782
GO:0070997 0.703482
GO:0033043 0.697664
GO:0051249 0.687925
GO:0008104 0.675076
GO:0033002 0.627125
GO:0042593 0.364293
GO:0071887 0.359703
GO:0044262 0.349744
GO:0051640 0.348052
GO:0051000 0.299934
GO:0050778 0.290903
GO:0007156 0.278928
GO:0008361 0.265902
GO:0070301 0.262988
GO:0022407 0.258553
GO:0015031 0.257560
GO:0043525 0.248848
GO:0051353 0.242570
GO:0043086 0.239824
GO:0045471 0.229314
GO:0051497 0.208992
GO:0031529 0.208648
GO:0099504 0.206822
GO:0043547 0.194217
GO:1904659 0.191457
GO:0031334 0.184335
GO:0046677 0.183479
GO:0015980 0.180476
GO:0060291 0.175234
GO:0009259 0.166290
GO:0060173 0.162212
GO:0042632 0.145299
GO:0046890 0.144186
GO:0032760 0.142722
GO:0051302 0.135000
GO:0031295 0.134696
GO:0019318 0.123006
GO:0010951 0.120040
GO:0021987 0.119137
GO:0006163 0.118024
GO:0030041 0.107955
GO:0001892 0.106324
GO:0030512 0.105991
GO:0060079 0.105991
GO:0050770 0.098547
GO:0051928 0.097553
GO:0031397 0.094042
GO:0060041 0.082956
GO:0051047 0.076258
GO:0019722 0.041730
GO:0090042 0.035699
sns.set(rc={'figure.figsize':(6,4)})
perc = str(round((100*len(GO_terms_auc_svm_df_final[GO_terms_auc_svm_df_final["auc"]>0.7])/len(GO_terms_auc_svm_df_final)),2))+"%"
N, bins, patches = plt.hist(GO_terms_auc_svm_df_final, color=CB_color_cycle[6],bins=50, linewidth=0.1)

for i in range(0,len(bins)-1):
    if bins[i]>0.7:
        patches[i].set_facecolor(CB_color_cycle[2])

plt.xlabel("AUC (logistic 1)", fontsize=16)  
plt.title(perc, fontsize=16)
# con el que mejor funciona es con la suma normal del attribution 
Text(0.5, 1.0, '94.68%')

Final model AUPR

GO_terms_aupr_svm_df_final = pd.DataFrame(list(GO_terms_aupr_svm_final.items()),columns = ['goterm','aupr']).set_index("goterm")
GO_terms_aupr_svm_df_final = GO_terms_aupr_svm_df_final.dropna()
GO_terms_aupr_svm_df_final.sort_values(by=["aupr"], ascending=False).head()
aupr
goterm
GO:0036289 1.000000
GO:0050896 0.995438
GO:0043170 0.989680
GO:0006807 0.987396
GO:0060440 0.978213
# TENGO PROBLEMA CON EL RECALL 
sns.set(rc={'figure.figsize':(5,3)})
perc = str(round((100*len(GO_terms_aupr_svm_df_final[GO_terms_aupr_svm_df_final["aupr"]>0.7])/len(GO_terms_aupr_svm_df_final)),2))+"%"
N, bins, patches = plt.hist(GO_terms_aupr_svm_df_final, color=CB_color_cycle[6],bins=50, linewidth=0.1)
for i in range(0,len(bins)-1):
    if bins[i]>0.7:
        patches[i].set_facecolor(CB_color_cycle[3])

plt.xlabel("AUPR", fontsize=16)  
plt.title(perc, fontsize=16)
Text(0.5, 1.0, '20.02%')

Predict for a new drug

Make predictions

unknown = list(set(attribution_data_all.columns)-set(attribution_data_annotated.columns))

Get the probabilities for all unknown drugs

predictions = {}
distances = {}
probabilities_unknown = pd.DataFrame()
preds_unknown = pd.DataFrame()

for drug in unknown:
    probabilities = {}
    for goterm in models_svm.keys():

        list_nodes = list(models_svm[goterm].feature_names_in_) # Extract the feature names from the model (those are the attributions we need)

        score = attribution_data_all.loc[list_nodes][drug].to_frame().T 
        score_mod = score.divide(attribution_data_annotated.loc[list_nodes].T.std()).fillna(0) #divide by std of each neuron, only use drugs that trained the models

        predictions[goterm]=models_svm[goterm].predict(score_mod)
        probabilities[goterm] = models_svm[goterm].predict_proba(score_mod)[::,1]  # platt values
        # distances[goterm] = models_svm[goterm].decision_function(score_mod)
        
    drug_probs = pd.DataFrame.from_dict(probabilities).T
    drug_probs.columns = [drug]
    drug_preds = pd.DataFrame.from_dict(predictions).T
    drug_preds.columns = [drug]    
    probabilities_unknown = pd.concat([probabilities_unknown,drug_probs], axis=1)
    preds_unknown = pd.concat([preds_unknown,drug_preds], axis=1)
    print(drug)
brd-k19103580-001-01-2
nvp-bhg712
wh-4-023
pd173074
cbpnzqvsjqdfbe-rerlvdevsa-n
n-(2,5-dimethoxyphenyl)sulfonyl-n-(4-methoxyphenyl)benzamide
brd-k33514849-001-01-9
chembl3182697
dfsdbfjuwanyes-ubwkhrtasa-n
nvp-adw742
sb 225002
schembl10436373
stf-62247
zm-447439
gsk269962a
schembl12469828
schembl2139153
azd7545
mira-1
wee1 inhibitor
achp
chembl2203525
brd-k49290616-001-01-9
nvp-231
pha-665752
jq1
n-[(2r,3s)-2-[[cyclopropylmethyl(methyl)amino]methyl]-5-[(2r)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-3,4-dihydro-2h-1,5-benzoxazocin-8-yl]-1-methyl-4-imidazolesulfonamide
otkwubxkthwzke-fuopvmcbsa-n
bms-345541;cc1=cc2=c(c=c1)n=c(c3=nc=c(n23)c)nccn.cl
cid5951923
ml-030
cct036477
pluripotin
nutlin-3a
skepinone-l
gsk-j4
(-)-rapamycin
hhdwuyjenprcsp-uttpphfysa-n
r406 (free base)
n-[[(4r,5r)-2-[(2r)-1-hydroxypropan-2-yl]-4-methyl-8-(4-methylpent-1-ynyl)-1,1-dioxo-4,5-dihydro-3h-6,1$l^{6},2-benzoxathiazocin-5-yl]methyl]-n-methyl-2-pyrazinecarboxamide
plx-4720
chebi:119735
pdk1 inhibitor
pf-573228
jq1 + mk-0752
cay10594
fti-277
gw843682x
sz4ta2
bix 02189
chembl2180739
chm-1
s-trityl-l-cysteine
qs11
hg6-64-1
lomeguatrib
rad51 inhibitor b02
bms-509744
bms614
brd-k09587429-001-01-3
gsk429286a
bx-912
fawugygebhaqbu-ppexnqrjsa-n
schembl2066172
schembl618594
ethyl 5,5,7,7-tetramethyl-2-(5-nitrothiophene-2-carboxamido)-4,5,6,7-tetrahydrothieno[2,3-c]pyridine-3-carboxylate
ar-42
fttyfnwrwdlflp-uhfffaoysa-n
n'-(2-pyrrolylidenemethyl)-2-(2,4,6-trichlorophenoxy)acetohydrazide
methylstat
nsc87877
opahmanwvumwaw-ghfzsmqjsa-n
wp-1130
dacarbazine
schembl6874948
mk-0752
brd-k35716340-001-01-2
brd-a63646118-001-02-6
brd-k62801835-001-01-0
shikonin
nsc 23766
selisistat
brd-k50799972-001-01-3
homoharringtonine
unc0321
sr8278
erastin
schembl12180851
schembl1710881
brd-k17060750-001-01-0
dbeq
jq1 + schembl2671349
mnulegdcpyonbu-pamdcedjsa-n
schembl13833463
gdc-0879
mdivi-1
chembl515416
brd-k53792571-003-01-6
n-[(2r,3r)-5-[(2s)-1-hydroxypropan-2-yl]-3-methyl-2-[[methyl-[(1-naphthalenylamino)-oxomethyl]amino]methyl]-6-oxo-3,4-dihydro-2h-1,5-benzoxazocin-10-yl]-4-pyridinecarboxamide
n-[[(2s,3s)-8-[2-(1-hydroxycyclopentyl)ethynyl]-5-[(2s)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-3,4-dihydro-2h-pyrido[2,3-b][1,5]oxazocin-2-yl]methyl]-n-methyl-4-oxanecarboxamide
gsk4112
mkwlqyduwjbeku-lwsjdiafsa-n
chembl3183639
khs101
ch 55
narciclasine
betulinic acid
sb-590885
brd-k20514654-001-01-8
dacinostat
unc0638
nsc207895
tw-37
11-cis retinoic acid
bibr 1532
dichloroplatinum diammoniate
5-azacytidine
gqrreykspjmlaw-ygnumjmvsa-n
chembl2058177
c6 ceramide
camptothecin
schembl12474870
agk2
ski ii
chembl24850
icg-001
n-[(2s,3s)-2-[(dimethylamino)methyl]-5-[(2r)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-3,4-dihydro-2h-1,5-benzoxazocin-10-yl]-2,5-dimethyl-3-pyrazolecarboxamide
vx-11e
cct 018159
wfygwjxipugujf-uhfffaoysa-n
mls000571394
aacocf3
naphtho(2,1-b)furan, 1-methyl-2-nitro-
n-[(2s,3s)-5-[(2r)-1-hydroxypropan-2-yl]-3-methyl-2-[[methyl-[(1-naphthalenylamino)-oxomethyl]amino]methyl]-6-oxo-2,3,4,7-tetrahydro-1,5-benzoxazonin-9-yl]-4-pyridinecarboxamide
brd-k25737009-001-01-2
jnk inhibitor viii
spautin-1
ic-87114
sr1001
16beta-bromoandrosterone
schembl2586580
nvp-tae684
cgp-60474
jw 480
fhtvasvneuemiv-lwsjdiafsa-n
ethyl 4-[4-[(5-nitrofuran-2-yl)methylidene]-3,5-dioxopyrazolidin-1-yl]benzoate
dmog
schembl15422095
chembl2132053
brd-k02251932-001-01-3
bms-536924;cc1=cc(=cc2=c1n/c(=c\3/c(=cc=nc3=o)nc[c@h](c4=cc(=cc=c4)cl)o)/n2)n5ccocc5
nsc60043
ng25
cid-2858522
schembl13737661
ikk-3 inhibitor
brd7880
smer-3
cx-5461
am580
elesclomol
chembl2381520
hc-067047
schembl17821363
niclosamide
n-[(2r,3s)-5-[(2r)-1-hydroxypropan-2-yl]-3-methyl-2-(methylaminomethyl)-6-oxo-2,3,4,7-tetrahydro-1,5-benzoxazonin-9-yl]methanesulfonamide
thapsigargin
brd1172
schembl1914213
i-bet-762
brd-k58306044-001-01-3
a-770041
chembl585951
glutaminase c-in-1
ql47
chembl2143553
cytochalasin b
n1-[2-(1h-indol-3-yl)ethyl]-n3-pyridin-4-ylbenzene-1,3-diamine
curcumin, curcuma longa l.
pifithrin
schembl16479156
chlorambucil
schembl15444220
zinc113660258
yk-4-279
pci-34051
fh535
schembl916391
n-(2-(4-(2-oxo-2,3-dihydro-1h-benzo[d]imidazol-1-yl)piperidin-1-yl)ethyl)-2-naphthamide
brd-k30019337-001-01-1
nvp-bsk805
chembl2356172
unc-0638 + schembl2671349
ki8751
az3146
isx-9
schembl13741284
ku-0063794
sb-431542
tpca-1
pdipalloxofubu-uhfffaoysa-n
(-)-epigallocatechin gallate
bleomycin
rsk inhibitor fmk
n9-isopropyl-olomoucine
mgcd-265
ml031
mg-132
n-[3-(1h-benzimidazol-2-yl)-5-(1-piperazinylmethyl)phenyl]-2-quinoxalinecarboxamide
le 135
phloretin
schembl2085358
abt-737
jw74
pf 750
chembl436817
cp-466722
myricetin
kcbbhekxehmwfw-yqzfvpmhsa-n
gw-405833
brd-6929
brd-k52037352-001-01-6
stemregenin 1
iu-1
apicidin
spox1_002925
brd6708
darinaparsin
wpttvjltnawyao-cdypjpissa-n
mi-2
ak174031
chebi:94975
nsc373989
snx-2112
ac-55649
gsk-650394
cyclopamine
o6-benzylguanine
pd153035
ipa-3
chir-99021
n-[[(4s,5r)-8-[2-(2-fluorophenyl)ethynyl]-2-[(2s)-1-hydroxypropan-2-yl]-4-methyl-1,1-dioxo-4,5-dihydro-3h-6,1$l^{6},2-benzoxathiazocin-5-yl]methyl]-n-methyl-2-pyridin-4-ylacetamide
su11274
chebi:94110
brd-k49456190-001-01-0
cct007093
schembl18216694
n-[(2r,3s)-5-[(2s)-1-hydroxypropan-2-yl]-3-methyl-2-(methylaminomethyl)-6-oxo-3,4-dihydro-2h-1,5-benzoxazocin-8-yl]cyclohexanecarboxamide
schembl18188080
cdk9 inhibitor
(s)-selisistat
parthenolide;c/c/1=c\cc[c@@]2([c@h](o2)[c@@h]3[c@@h](cc1)c(=c)c(=o)o3)c
marinopyrrole a
brd-a34462049-001-01-0
etp-46464
lsm-6185
bx-795
nu-7441
n-[(2s,3s,6r)-2-(hydroxymethyl)-6-[2-(4-methyl-1-piperazinyl)-2-oxoethyl]-3-oxanyl]-1,3-benzodioxole-5-carboxamide
xhqlywyicdktpj-uhfffaoysa-n
cay10603
nelarabine
parbendazole
brd-k33199242-001-01-2
mln2480
lsm-13729
tubastatin a
kpt-185
bai1
ci 976
importazole
brd-k04800985-001-01-1
gw 441756
schembl16273428
mitomycin c
i-bet151
as-605240
pd318088
cil56
brd-k02492147-001-01-4
azd1152-hqpa
brd-k27986637-001-01-3
ethyl 2-cyano-3-(3,4-dichlorophenyl)acryloylcarbamate
tcmdc-125552
bms-345541;cc1=cc2=c(c=c1)n=c(c3=nc=c(n23)c)nccn
bay 61-3606 + hydrochloric acid
bryostatin 1
bam7
palmostatin b
serdemetan
jq1 + unc0638
n-[2-methyl-5-[oxo-[3-(1-oxoprop-2-enylamino)-5-(trifluoromethyl)anilino]methyl]phenyl]-5-isoxazolecarboxamide
oprea1_718426
tipifarnib (s enantiomer)
salermide
sch-529074
pf-543
sb-525334
ro-3306
pf-4708671
gsk461364
pf 184
lrlwxbhfpgsuox-hhkxydnmsa-n
bendamustine
chembl416418
pik-93
chembl568305
chembl3188232
ku-55933
6-bio
ku-60019
schembl16296919
ak174031 + mk-1775
ml311
schembl18426910
vaf347
bms-536924;cc1=cc(=cc2=c1nc(=c3c(=cc=nc3=o)nc[c@h](c4=cc(=cc=c4)cl)o)n2)n5ccocc5
sl-0101
mps1-in-1 + hydrochloric acid
nsc 95397
lfm-a13
schembl16046542
telomerase inhibitor ix
mls000106215
procarbazine
gnf-2
fqi1
brd-a28105619-001-01-3
ly2183240
embelin
mtlmdzjugdutcp-ywefrbeisa-n
cot inhibitor-2
isoliquiritigenin
n-[3-[[2-[[4-(dimethylamino)cyclohexyl]amino]-9-propan-2-yl-6-purinyl]amino]phenyl]-2-propenamide
bms270394
nsc74859
austocystin d
schembl15428380
ku-0060648
dasa-58
n-[(2r,3s)-5-[(2r)-1-hydroxypropan-2-yl]-3-methyl-2-(methylaminomethyl)-6-oxo-2,3,4,7-tetrahydro-1,5-benzoxazonin-9-yl]-3-(4-morpholinyl)propanamide
brd-a59431241-001-01-1
whi-p97
brd-a15100685-001-01-8
schembl4463213
osi-027;coc1=cc=cc2=c/c(=c/3\c4=c(n=cnn4c(=n3)c5ccc(cc5)c(=o)o)n)/n=c21
aica ribonucleotide
n-[(2s,3s)-2-[[[(cyclohexylamino)-oxomethyl]-methylamino]methyl]-5-[(2r)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-3,4-dihydro-2h-1,5-benzoxazocin-8-yl]-2-(1-methyl-3-indolyl)acetamide
chs-828
ifosfamide
sepantronium + bromide
bleomycin sulfate
gsk1904529a
ouabain
n-[(2r,3s)-2-[[(4-chlorophenyl)sulfonyl-methylamino]methyl]-5-[(2s)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-2,3,4,7-tetrahydro-1,5-benzoxazonin-9-yl]-4,4,4-trifluorobutanamide
1009820-21-6
azanide; dichloroplatinum(2+)
brd3308
leptomycin b
as601245
erk5-in-1
phenformin
rg108
at7867
chebi:93385
tcmdc-123515
stf-31
chembl2062550
n-[2-methyl-5-[2-oxo-9-(1h-pyrazol-4-yl)-1-benzo[h][1,6]naphthyridinyl]phenyl]-2-propenamide
brd-k16147474-001-01-1
ubrvgbldxdoetm-uhfffaoysa-n
qfjcirlumzquot-laoshscvsa-n
ml-210
necrosulfonamide
a-804598
schembl15422028
schembl14934014
cdk4/6 inhibitor iv
srlvtmsbrcmody-qxpfvdmisa-n
j3.559.058g
tgx-221
gw2580
schembl6465274
nsc136476
c646
mls001198989
hms1361j12
ciclopirox
schembl10183194
salubrinal
ko 143
z-llnle-cho
lsm-6189
temozolomide
chembl258148
n-(4-methoxyphenyl)sulfonyl-n-[2-[2-(1-oxido-4-pyridin-1-iumyl)ethenyl]phenyl]acetamide
necrostatin-1
n'-[(6-oxo-5-prop-2-enyl-1-cyclohexa-2,4-dienylidene)methyl]-2-[4-(phenylmethyl)-1-piperazinyl]acetohydrazide;c=ccc1=cc=cc(=cnnc(=o)cn2ccn(cc2)cc3=cc=cc=c3)c1=o
nsc48300
lrrk2-in-1
t0901317
n-cyclopropyl-3-[3-[[cyclopropyl(oxo)methyl]amino]-1h-indazol-6-yl]benzamide
jw-55
pf-4800567
az-628
fr-180204
wz4002
schembl12182311
brd4770
brd-k41597374-001-01-7
eht-1864
n'-[(6-oxo-5-prop-2-enyl-1-cyclohexa-2,4-dienylidene)methyl]-2-[4-(phenylmethyl)-1-piperazinyl]acetohydrazide;c=ccc1=cc=c/c(=c/nnc(=o)cn2ccn(cc2)cc3=cc=cc=c3)/c1=o
wz8040
ahpn
chembl2152368
epz004777 + schembl2671349
nan + nan
schembl4320913
lsm-36779
brd-k29086754-001-01-7
a-443654
sepantronium
isoevodiamine
retinol + schembl2671349
sch-529074 + jnj-26854165
agwauacrbaqpjj-uhfffaoysa-n
chembl520231
schembl11942935
chembl3185999
n-methyl-n-[4-[[6-[[1-(1-oxoprop-2-enyl)-3-piperidinyl]amino]-7h-purin-2-yl]amino]phenyl]propanamide
parthenolide;c/c/1=c/cc[c@@]2([c@h](o2)[c@@h]3[c@@h](cc1)c(=c)c(=o)o3)c
ym-201636
schembl2671349
smr000198998
rigosertib
dqnfqthsdkxsee-qfzqxzrasa-n
cbb1007
oqhlpaawwgdxaw-uhfffaoysa-n
daporinad
cyclophosphamide
mi-1
schembl12041987
chembl2206358
brd-1240
isonicotinohydroxamic acid
rsl3
schembl13833318
1,2-cyclohexanediamine anion + oxalic acid
sb-216763
brd-k05870596-001-01-4
dnmdp-2
n-[6-(2-amino-4-fluoroanilino)-6-oxohexyl]-4-methylbenzamide
chembl2398212
akt inhibitor viii

Study drug with unknown MOA

Choose drug with unknown MOA…

combobox_u = interactive(f, drug=widgets.Combobox(options=unknown))
predictions_nodes = []
for goterm in list(platt_matrix.index):
    predictions_nodes.append(goterm+"_"+str(1))
# add names to go terms
real_go_info_svm= real_go_info[real_go_info.GO_term.isin(predictions_nodes)]
real_go_info_svm.GO_term = real_go_info_svm.GO_term.str.replace("_1","")
display(combobox_u)
selected_drug_u_name = combobox_u.result
predictions_df = pd.DataFrame.from_dict(preds_unknown.loc[:,selected_drug_u_name]).reset_index()
predictions_df.columns  = ["GO_term","predictions"]
probabilities_df = pd.DataFrame.from_dict(probabilities_unknown.loc[:,selected_drug_u_name]).reset_index()
probabilities_df.columns  = ["GO_term","probability"]
probabilities_df = probabilities_df.merge(real_go_info_svm, on="GO_term")
probabilities_df = probabilities_df.merge(predictions_df, on="GO_term")
probabilities_df.loc[probabilities_df["layer_number"] <=3].sort_values(by=["probability"], ascending=False).head(200)
GO_term probability Name layer_number predictions
820 GO:0033993 0.809556 Response to lipid (1) 3.0 1.0
223 GO:0018108 0.783375 Peptidyl-tyrosine phosphorylation (1) 3.0 1.0
573 GO:0010629 0.742615 Negative regulation of gene expression (1) 3.0 1.0
106 GO:0071900 0.725062 Regulation of protein serine/threonine kinase activity (1) 2.0 1.0
624 GO:0010628 0.702924 Positive regulation of gene expression (1) 3.0 1.0
74 GO:0001817 0.687600 Regulation of cytokine production (1) 3.0 1.0
44 GO:0048812 0.672129 Neuron projection morphogenesis (1) 3.0 1.0
224 GO:0046777 0.661031 Protein autophosphorylation (1) 1.0 1.0
99 GO:0001934 0.658374 Positive regulation of protein phosphorylation (1) 3.0 1.0
570 GO:0045597 0.628072 Positive regulation of cell differentiation (1) 3.0 1.0
839 GO:0031047 0.553888 Gene silencing by rna (1) 2.0 1.0
100 GO:0033138 0.542770 Positive regulation of peptidyl-serine phosphorylation (1) 1.0 1.0
888 GO:0034976 0.540483 Response to endoplasmic reticulum stress (1) 3.0 1.0
633 GO:0051301 0.535293 Cell division (1) 2.0 1.0
821 GO:0034097 0.533600 Response to cytokine (1) 3.0 1.0
423 GO:1902533 0.530199 Positive regulation of intracellular signal transduction (1) 2.0 1.0
596 GO:0060341 0.523390 Regulation of cellular localization (1) 3.0 0.0
729 GO:0120035 0.514954 Regulation of plasma membrane bounded cell projection organization (1) 3.0 1.0
568 GO:0008284 0.513916 Positive regulation of cell population proliferation (1) 2.0 1.0
558 GO:0016032 0.500000 Viral process (1) 3.0 1.0
641 GO:0071417 0.494543 Cellular response to organonitrogen compound (1) 3.0 1.0
353 GO:0006954 0.482633 Inflammatory response (1) 3.0 1.0
9 GO:0043408 0.476159 Regulation of mapk cascade (1) 2.0 1.0
808 GO:1902532 0.449154 Negative regulation of intracellular signal transduction (1) 3.0 1.0
11 GO:0043406 0.445845 Positive regulation of map kinase activity (1) 1.0 1.0
8 GO:0000165 0.443783 Mapk cascade (1) 3.0 0.0
358 GO:0007005 0.427023 Mitochondrion organization (1) 3.0 1.0
134 GO:0002366 0.423235 Leukocyte activation involved in immune response (1) 3.0 1.0
221 GO:0018105 0.422261 Peptidyl-serine phosphorylation (1) 2.0 0.0
285 GO:0051051 0.421500 Negative regulation of transport (1) 3.0 1.0
191 GO:0045944 0.420524 Positive regulation of transcription by rna polymerase ii (1) 2.0 1.0
654 GO:0090398 0.416328 Cellular senescence (1) 1.0 1.0
847 GO:0045055 0.414665 Regulated exocytosis (1) 2.0 1.0
342 GO:2001243 0.411574 Negative regulation of intrinsic apoptotic signaling pathway (1) 2.0 1.0
510 GO:0042063 0.408279 Gliogenesis (1) 3.0 1.0
788 GO:0009410 0.407937 Response to xenobiotic stimulus (1) 2.0 1.0
824 GO:0071363 0.407544 Cellular response to growth factor stimulus (1) 3.0 1.0
496 GO:0048608 0.406699 Reproductive structure development (1) 2.0 1.0
528 GO:0007565 0.406022 Female pregnancy (1) 2.0 1.0
896 GO:0097193 0.400210 Intrinsic apoptotic signaling pathway (1) 3.0 1.0
76 GO:0001819 0.388417 Positive regulation of cytokine production (1) 2.0 1.0
48 GO:0001525 0.386728 Angiogenesis (1) 2.0 1.0
300 GO:0032386 0.384834 Regulation of intracellular transport (1) 2.0 1.0
906 GO:0043549 0.378711 Regulation of kinase activity (1) 3.0 0.0
662 GO:0031648 0.377411 Protein destabilization (1) 0.0 1.0
516 GO:0007423 0.372363 Sensory organ development (1) 3.0 1.0
461 GO:0050804 0.367884 Modulation of chemical synaptic transmission (1) 3.0 1.0
104 GO:0006469 0.364467 Negative regulation of protein kinase activity (1) 2.0 1.0
620 GO:0051098 0.363823 Regulation of binding (1) 3.0 1.0
86 GO:0072006 0.358531 Nephron development (1) 2.0 1.0
473 GO:0008584 0.350294 Male gonad development (1) 1.0 1.0
536 GO:0007610 0.346612 Behavior (1) 3.0 0.0
693 GO:1904646 0.344997 Cellular response to amyloid-beta (1) 0.0 1.0
376 GO:0007015 0.344878 Actin filament organization (1) 3.0 1.0
552 GO:0033365 0.343027 Protein localization to organelle (1) 3.0 1.0
774 GO:0030216 0.342203 Keratinocyte differentiation (1) 2.0 1.0
639 GO:0060326 0.341919 Cell chemotaxis (1) 2.0 1.0
154 GO:0050778 0.335649 Positive regulation of immune response (1) 3.0 0.0
480 GO:0048565 0.333289 Digestive tract development (1) 1.0 1.0
794 GO:0043434 0.332497 Response to peptide hormone (1) 3.0 1.0
651 GO:0050808 0.322913 Synapse organization (1) 3.0 1.0
772 GO:0060485 0.320964 Mesenchyme development (1) 3.0 1.0
406 GO:0048041 0.319689 Focal adhesion assembly (1) 1.0 1.0
698 GO:0070663 0.318299 Regulation of leukocyte proliferation (1) 2.0 1.0
804 GO:0030855 0.317904 Epithelial cell differentiation (1) 3.0 0.0
538 GO:0048266 0.308838 Behavioral response to pain (1) 0.0 1.0
225 GO:0006470 0.305848 Protein dephosphorylation (1) 3.0 1.0
719 GO:0043244 0.305739 Regulation of protein-containing complex disassembly (1) 2.0 1.0
725 GO:0070997 0.305130 Neuron death (1) 2.0 0.0
532 GO:0007596 0.304871 Blood coagulation (1) 3.0 1.0
806 GO:0051056 0.302828 Regulation of small gtpase mediated signal transduction (1) 3.0 1.0
433 GO:0097191 0.302625 Extrinsic apoptotic signaling pathway (1) 3.0 1.0
574 GO:0008285 0.300631 Negative regulation of cell population proliferation (1) 3.0 0.0
49 GO:0001569 0.300115 Branching involved in blood vessel morphogenesis (1) 0.0 1.0
926 GO:0060020 0.297104 Bergmann glial cell differentiation (1) 0.0 1.0
789 GO:0009416 0.295539 Response to light stimulus (1) 2.0 0.0
887 GO:0034504 0.294393 Protein localization to nucleus (1) 2.0 1.0
169 GO:0002764 0.292841 Immune response-regulating signaling pathway (1) 3.0 0.0
890 GO:0071353 0.290912 Cellular response to interleukin-4 (1) 1.0 1.0
505 GO:0007283 0.290337 Spermatogenesis (1) 2.0 1.0
513 GO:0030900 0.289030 Forebrain development (1) 3.0 1.0
241 GO:0006612 0.288796 Protein targeting to membrane (1) 1.0 1.0
644 GO:0071230 0.284292 Cellular response to amino acid stimulus (1) 1.0 1.0
712 GO:0033002 0.280632 Muscle cell proliferation (1) 2.0 1.0
317 GO:0006897 0.277768 Endocytosis (1) 3.0 1.0
230 GO:0030162 0.277751 Regulation of proteolysis (1) 3.0 0.0
607 GO:0042391 0.276380 Regulation of membrane potential (1) 3.0 0.0
689 GO:1905897 0.275961 Regulation of response to endoplasmic reticulum stress (1) 2.0 1.0
105 GO:0045860 0.275729 Positive regulation of protein kinase activity (1) 2.0 0.0
198 GO:0006260 0.275261 Dna replication (1) 3.0 1.0
548 GO:1903829 0.274893 Positive regulation of protein localization (1) 3.0 0.0
934 GO:0051258 0.274092 Protein polymerization (1) 3.0 1.0
378 GO:0031532 0.271109 Actin cytoskeleton reorganization (1) 1.0 1.0
216 GO:0045727 0.270882 Positive regulation of translation (1) 1.0 1.0
642 GO:0034599 0.270088 Cellular response to oxidative stress (1) 3.0 0.0
767 GO:0051146 0.266722 Striated muscle cell differentiation (1) 2.0 0.0
77 GO:0002718 0.262195 Regulation of cytokine production involved in immune response (1) 2.0 1.0
19 GO:0031109 0.261030 Microtubule polymerization or depolymerization (1) 2.0 1.0
584 GO:0040008 0.259597 Regulation of growth (1) 3.0 0.0
937 GO:0051640 0.257672 Organelle localization (1) 3.0 0.0
377 GO:0031032 0.257340 Actomyosin structure organization (1) 2.0 1.0
904 GO:0042113 0.254618 B cell activation (1) 3.0 0.0
133 GO:0043303 0.253768 Mast cell degranulation (1) 1.0 1.0
561 GO:0048511 0.251652 Rhythmic process (1) 3.0 1.0
243 GO:0006606 0.250144 Protein import into nucleus (1) 1.0 1.0
836 GO:1901987 0.249107 Regulation of cell cycle phase transition (1) 3.0 0.0
758 GO:0031099 0.247922 Regeneration (1) 2.0 1.0
739 GO:1902903 0.245529 Regulation of supramolecular fiber organization (1) 3.0 1.0
323 GO:0016236 0.245469 Macroautophagy (1) 3.0 1.0
478 GO:0048568 0.244894 Embryonic organ development (1) 3.0 0.0
103 GO:0042531 0.243583 Positive regulation of tyrosine phosphorylation of stat protein (1) 0.0 1.0
598 GO:0043254 0.240098 Regulation of protein-containing complex assembly (1) 3.0 1.0
864 GO:0030183 0.236405 B cell differentiation (1) 1.0 0.0
301 GO:0032388 0.235842 Positive regulation of intracellular transport (1) 1.0 0.0
692 GO:0010595 0.235403 Positive regulation of endothelial cell migration (1) 2.0 1.0
865 GO:0030217 0.234830 T cell differentiation (1) 3.0 0.0
20 GO:0070507 0.234661 Regulation of microtubule cytoskeleton organization (1) 2.0 1.0
111 GO:0031069 0.234249 Hair follicle morphogenesis (1) 0.0 1.0
363 GO:0051494 0.232679 Negative regulation of cytoskeleton organization (1) 2.0 1.0
226 GO:0035304 0.232643 Regulation of protein dephosphorylation (1) 2.0 1.0
913 GO:0090630 0.231968 Activation of gtpase activity (1) 0.0 1.0
733 GO:0030335 0.231192 Positive regulation of cell migration (1) 3.0 0.0
196 GO:0006357 0.230696 Regulation of transcription by rna polymerase ii (1) 3.0 1.0
524 GO:0007519 0.230417 Skeletal muscle tissue development (1) 2.0 1.0
53 GO:0001570 0.230416 Vasculogenesis (1) 1.0 1.0
26 GO:1901990 0.229708 Regulation of mitotic cell cycle phase transition (1) 2.0 0.0
33 GO:0000423 0.229518 Mitophagy (1) 1.0 1.0
615 GO:0035265 0.227576 Organ growth (1) 2.0 0.0
84 GO:0001822 0.227091 Kidney development (1) 3.0 0.0
151 GO:0006959 0.225795 Humoral immune response (1) 2.0 1.0
244 GO:0042307 0.225164 Positive regulation of protein import into nucleus (1) 0.0 1.0
24 GO:0007346 0.224934 Regulation of mitotic cell cycle (1) 3.0 0.0
162 GO:0060374 0.223666 Mast cell differentiation (1) 0.0 1.0
533 GO:0030168 0.219923 Platelet activation (1) 2.0 1.0
152 GO:0045087 0.218376 Innate immune response (1) 3.0 0.0
523 GO:0007517 0.217930 Muscle organ development (1) 3.0 0.0
138 GO:0002683 0.217495 Negative regulation of immune system process (1) 3.0 0.0
319 GO:0006909 0.217477 Phagocytosis (1) 2.0 1.0
18 GO:0000226 0.216530 Microtubule cytoskeleton organization (1) 3.0 0.0
622 GO:0043086 0.212387 Negative regulation of catalytic activity (1) 3.0 1.0
898 GO:0035924 0.211788 Cellular response to vascular endothelial growth factor stimulus (1) 2.0 1.0
187 GO:0071897 0.211315 Dna biosynthetic process (1) 2.0 0.0
517 GO:0043586 0.209986 Tongue development (1) 1.0 1.0
606 GO:0048638 0.209981 Regulation of developmental growth (1) 2.0 0.0
736 GO:0009617 0.209127 Response to bacterium (1) 3.0 0.0
691 GO:0043542 0.207450 Endothelial cell migration (1) 3.0 0.0
321 GO:0010507 0.205494 Negative regulation of autophagy (1) 1.0 1.0
149 GO:0050853 0.205417 B cell receptor signaling pathway (1) 1.0 0.0
885 GO:1900180 0.205300 Regulation of protein localization to nucleus (1) 1.0 1.0
907 GO:0051881 0.202707 Regulation of mitochondrial membrane potential (1) 1.0 0.0
171 GO:0003014 0.201637 Renal system process (1) 2.0 1.0
211 GO:0031507 0.200984 Heterochromatin assembly (1) 1.0 1.0
64 GO:0071456 0.200564 Cellular response to hypoxia (1) 1.0 0.0
694 GO:0032869 0.197755 Cellular response to insulin stimulus (1) 2.0 0.0
779 GO:0008544 0.196206 Epidermis development (1) 3.0 0.0
634 GO:0061024 0.193065 Membrane organization (1) 2.0 0.0
50 GO:0002040 0.192737 Sprouting angiogenesis (1) 1.0 0.0
610 GO:0031333 0.191284 Negative regulation of protein-containing complex assembly (1) 2.0 1.0
657 GO:0045165 0.191259 Cell fate commitment (1) 3.0 0.0
435 GO:0016055 0.190638 Wnt signaling pathway (1) 2.0 0.0
569 GO:0030307 0.190101 Positive regulation of cell growth (1) 2.0 1.0
200 GO:0006281 0.189187 Dna repair (1) 2.0 0.0
63 GO:0001666 0.188575 Response to hypoxia (1) 2.0 1.0
560 GO:0043473 0.188137 Pigmentation (1) 2.0 1.0
521 GO:0035051 0.187551 Cardiocyte differentiation (1) 2.0 1.0
690 GO:2001020 0.187450 Regulation of response to dna damage stimulus (1) 2.0 0.0
346 GO:0006936 0.187431 Muscle contraction (1) 3.0 1.0
328 GO:0043065 0.184805 Positive regulation of apoptotic process (1) 2.0 0.0
650 GO:0034329 0.183554 Cell junction assembly (1) 2.0 0.0
262 GO:0045429 0.183059 Positive regulation of nitric oxide biosynthetic process (1) 0.0 1.0
117 GO:0060562 0.183024 Epithelial tube morphogenesis (1) 2.0 0.0
583 GO:0032967 0.182852 Positive regulation of collagen biosynthetic process (1) 0.0 1.0
208 GO:0006325 0.182003 Chromatin organization (1) 3.0 0.0
209 GO:0006338 0.181855 Chromatin remodeling (1) 2.0 1.0
882 GO:0032147 0.181843 Activation of protein kinase activity (1) 1.0 0.0
7 GO:0000122 0.181449 Negative regulation of transcription by rna polymerase ii (1) 1.0 0.0
911 GO:0060416 0.180831 Response to growth hormone (1) 1.0 1.0
121 GO:0090050 0.180206 Positive regulation of cell migration involved in sprouting angiogenesis (1) 0.0 1.0
467 GO:0009791 0.179613 Post-embryonic development (1) 1.0 0.0
870 GO:0070527 0.179383 Platelet aggregation (1) 1.0 1.0
781 GO:0008625 0.179216 Extrinsic apoptotic signaling pathway via death domain receptors (1) 1.0 1.0
702 GO:0048146 0.179210 Positive regulation of fibroblast proliferation (1) 0.0 1.0
785 GO:0009266 0.178944 Response to temperature stimulus (1) 2.0 1.0
316 GO:0033157 0.178583 Regulation of intracellular protein transport (1) 1.0 0.0
472 GO:0001553 0.178541 Luteinization (1) 0.0 1.0
174 GO:0010613 0.176998 Positive regulation of cardiac muscle hypertrophy (1) 1.0 1.0
842 GO:0071407 0.176595 Cellular response to organic cyclic compound (1) 3.0 0.0
834 GO:0035195 0.176389 Gene silencing by mirna (1) 1.0 0.0
504 GO:0048709 0.176248 Oligodendrocyte differentiation (1) 2.0 1.0
54 GO:2001214 0.175971 Positive regulation of vasculogenesis (1) 0.0 1.0
600 GO:0010632 0.174413 Regulation of epithelial cell migration (1) 3.0 0.0
682 GO:0007026 0.174040 Negative regulation of microtubule depolymerization (1) 0.0 1.0
276 GO:0016567 0.173194 Protein ubiquitination (1) 3.0 0.0
881 GO:0031929 0.171840 Tor signaling (1) 2.0 1.0
52 GO:0001541 0.171713 Ovarian follicle development (1) 1.0 0.0
310 GO:0051924 0.171261 Regulation of calcium ion transport (1) 3.0 0.0
899 GO:0035994 0.170901 Response to muscle stretch (1) 1.0 1.0
32 GO:0000422 0.170612 Autophagy of mitochondrion (1) 2.0 1.0
703 GO:0048661 0.169791 Positive regulation of smooth muscle cell proliferation (1) 1.0 0.0
447 GO:0007173 0.169218 Epidermal growth factor receptor signaling pathway (1) 2.0 0.0
sum(probabilities_df["predictions"] ==1)
288
sum(probabilities_df["predictions"] ==0)
651

Probability < 0.5 doesn’t mean it does not belong to the class, a probability of for example 0.2 can represent a 1 (annotated to MoA)

Modify probabilities

Take into account the annotations each GO term has (general GO terms are easier to predict as they have more annotations)

For drug with unknown MOA…

sum_annotations = slim_matrix_single_neuron.T.sum()/slim_matrix_single_neuron.shape[1]
logits_apriori = np.log(sum_annotations/(1-sum_annotations))

logits_apost= np.log(probabilities_df["probability"]/(1-probabilities_df["probability"]))
delta_logits =logits_apost.to_numpy()- logits_apriori.to_numpy()
delta_logits_df = pd.DataFrame(delta_logits)
delta_logits_df.columns = ["delta_logits"]
probabilities_mod = probabilities_df.merge(delta_logits_df, left_index=True,right_index=True)
probabilities_mod.loc[probabilities_mod["predictions"] ==1].loc[probabilities_mod["layer_number"] <= 7].sort_values(by=["delta_logits"], ascending=False)
GO_term probability Name layer_number predictions delta_logits
839 GO:0031047 0.553888 Gene silencing by rna (1) 2.0 1.0 2.192458
662 GO:0031648 0.377411 Protein destabilization (1) 0.0 1.0 2.092834
106 GO:0071900 0.725062 Regulation of protein serine/threonine kinase activity (1) 2.0 1.0 1.944569
33 GO:0000423 0.229518 Mitophagy (1) 1.0 1.0 1.880007
538 GO:0048266 0.308838 Behavioral response to pain (1) 0.0 1.0 1.857031
913 GO:0090630 0.231968 Activation of gtpase activity (1) 0.0 1.0 1.793943
216 GO:0045727 0.270882 Positive regulation of translation (1) 1.0 1.0 1.746070
223 GO:0018108 0.783375 Peptidyl-tyrosine phosphorylation (1) 3.0 1.0 1.672826
719 GO:0043244 0.305739 Regulation of protein-containing complex disassembly (1) 2.0 1.0 1.646098
241 GO:0006612 0.288796 Protein targeting to membrane (1) 1.0 1.0 1.626841
104 GO:0006469 0.364467 Negative regulation of protein kinase activity (1) 2.0 1.0 1.593794
888 GO:0034976 0.540483 Response to endoplasmic reticulum stress (1) 3.0 1.0 1.575980
890 GO:0071353 0.290912 Cellular response to interleukin-4 (1) 1.0 1.0 1.575256
638 GO:0033554 0.884331 Cellular response to stress (1) 4.0 1.0 1.573975
820 GO:0033993 0.809556 Response to lipid (1) 3.0 1.0 1.551573
74 GO:0001817 0.687600 Regulation of cytokine production (1) 3.0 1.0 1.534715
682 GO:0007026 0.174040 Negative regulation of microtubule depolymerization (1) 0.0 1.0 1.533781
528 GO:0007565 0.406022 Female pregnancy (1) 2.0 1.0 1.478881
224 GO:0046777 0.661031 Protein autophosphorylation (1) 1.0 1.0 1.453821
544 GO:0060179 0.161090 Male mating behavior (1) 0.0 1.0 1.440901
32 GO:0000422 0.170612 Autophagy of mitochondrion (1) 2.0 1.0 1.409881
774 GO:0030216 0.342203 Keratinocyte differentiation (1) 2.0 1.0 1.406530
44 GO:0048812 0.672129 Neuron projection morphogenesis (1) 3.0 1.0 1.385004
472 GO:0001553 0.178541 Luteinization (1) 0.0 1.0 1.373326
77 GO:0002718 0.262195 Regulation of cytokine production involved in immune response (1) 2.0 1.0 1.372827
900 GO:0042060 0.612497 Wound healing (1) 4.0 1.0 1.325911
385 GO:0060632 0.143694 Regulation of microtubule-based movement (1) 1.0 1.0 1.306104
726 GO:0065003 0.654123 Protein-containing complex assembly (1) 4.0 1.0 1.304384
926 GO:0060020 0.297104 Bergmann glial cell differentiation (1) 0.0 1.0 1.288696
582 GO:1902459 0.140358 Positive regulation of stem cell population maintenance (1) 0.0 1.0 1.278722
63 GO:0001666 0.188575 Response to hypoxia (1) 2.0 1.0 1.276928
342 GO:2001243 0.411574 Negative regulation of intrinsic apoptotic signaling pathway (1) 2.0 1.0 1.262448
573 GO:0010629 0.742615 Negative regulation of gene expression (1) 3.0 1.0 1.216446
480 GO:0048565 0.333289 Digestive tract development (1) 1.0 1.0 1.203775
174 GO:0010613 0.176998 Positive regulation of cardiac muscle hypertrophy (1) 1.0 1.0 1.199403
676 GO:0030282 0.116755 Bone mineralization (1) 1.0 1.0 1.177409
49 GO:0001569 0.300115 Branching involved in blood vessel morphogenesis (1) 0.0 1.0 1.170620
899 GO:0035994 0.170901 Response to muscle stretch (1) 1.0 1.0 1.156962
443 GO:0035860 0.114067 Glial cell-derived neurotrophic factor receptor signaling pathway (1) 0.0 1.0 1.151083
847 GO:0045055 0.414665 Regulated exocytosis (1) 2.0 1.0 1.096844
134 GO:0002366 0.423235 Leukocyte activation involved in immune response (1) 3.0 1.0 1.076788
181 GO:0006139 0.875555 Nucleobase-containing compound metabolic process (1) 6.0 1.0 1.061941
689 GO:1905897 0.275961 Regulation of response to endoplasmic reticulum stress (1) 2.0 1.0 1.052784
38 GO:0000902 0.813929 Cell morphogenesis (1) 4.0 1.0 1.052132
654 GO:0090398 0.416328 Cellular senescence (1) 1.0 1.0 1.048427
693 GO:1904646 0.344997 Cellular response to amyloid-beta (1) 0.0 1.0 1.043235
100 GO:0033138 0.542770 Positive regulation of peptidyl-serine phosphorylation (1) 1.0 1.0 1.018799
745 GO:0009653 0.877370 Anatomical structure morphogenesis (1) 5.0 1.0 1.014649
198 GO:0006260 0.275261 Dna replication (1) 3.0 1.0 1.007970
133 GO:0043303 0.253768 Mast cell degranulation (1) 1.0 1.0 0.981406
368 GO:0060271 0.126831 Cilium assembly (1) 3.0 1.0 0.970313
285 GO:0051051 0.421500 Negative regulation of transport (1) 3.0 1.0 0.964316
626 GO:0051649 0.802013 Establishment of localization in cell (1) 4.0 1.0 0.957089
378 GO:0031532 0.271109 Actin cytoskeleton reorganization (1) 1.0 1.0 0.947004
651 GO:0050808 0.322913 Synapse organization (1) 3.0 1.0 0.943923
11 GO:0043406 0.445845 Positive regulation of map kinase activity (1) 1.0 1.0 0.939978
34 GO:1903146 0.103490 Regulation of autophagy of mitochondrion (1) 1.0 1.0 0.932003
262 GO:0045429 0.183059 Positive regulation of nitric oxide biosynthetic process (1) 0.0 1.0 0.911660
661 GO:0046326 0.165116 Positive regulation of glucose import (1) 0.0 1.0 0.907432
911 GO:0060416 0.180831 Response to growth hormone (1) 1.0 1.0 0.896690
571 GO:2000010 0.127151 Positive regulation of protein localization to cell surface (1) 0.0 1.0 0.888565
633 GO:0051301 0.535293 Cell division (1) 2.0 1.0 0.887200
483 GO:0035909 0.162013 Aorta morphogenesis (1) 1.0 1.0 0.884752
54 GO:2001214 0.175971 Positive regulation of vasculogenesis (1) 0.0 1.0 0.863534
416 GO:0035556 0.887824 Intracellular signal transduction (1) 4.0 1.0 0.862843
470 GO:0042733 0.097137 Embryonic digit morphogenesis (1) 0.0 1.0 0.861598
886 GO:0034502 0.132453 Protein localization to chromosome (1) 2.0 1.0 0.856780
111 GO:0031069 0.234249 Hair follicle morphogenesis (1) 0.0 1.0 0.832897
880 GO:0031667 0.374380 Response to nutrient levels (1) 4.0 1.0 0.819334
833 GO:0010467 0.880677 Gene expression (1) 5.0 1.0 0.817357
184 GO:0006275 0.102018 Regulation of dna replication (1) 2.0 1.0 0.816174
162 GO:0060374 0.223666 Mast cell differentiation (1) 0.0 1.0 0.815593
265 GO:0051247 0.818839 Positive regulation of protein metabolic process (1) 4.0 1.0 0.802282
821 GO:0034097 0.533600 Response to cytokine (1) 3.0 1.0 0.801775
408 GO:0007165 0.930074 Signal transduction (1) 6.0 1.0 0.801131
849 GO:0043966 0.142338 Histone h3 acetylation (1) 2.0 1.0 0.797379
112 GO:0060789 0.091132 Hair follicle placode formation (1) 0.0 1.0 0.791156
558 GO:0016032 0.500000 Viral process (1) 3.0 1.0 0.785929
151 GO:0006959 0.225795 Humoral immune response (1) 2.0 1.0 0.785159
86 GO:0072006 0.358531 Nephron development (1) 2.0 1.0 0.777591
83 GO:0002720 0.163684 Positive regulation of cytokine production involved in immune response (1) 1.0 1.0 0.776348
53 GO:0001570 0.230416 Vasculogenesis (1) 1.0 1.0 0.770097
532 GO:0007596 0.304871 Blood coagulation (1) 3.0 1.0 0.764501
567 GO:0051641 0.841801 Cellular localization (1) 5.0 1.0 0.761481
639 GO:0060326 0.341919 Cell chemotaxis (1) 2.0 1.0 0.758938
19 GO:0031109 0.261030 Microtubule polymerization or depolymerization (1) 2.0 1.0 0.746075
800 GO:0030521 0.102431 Androgen receptor signaling pathway (1) 1.0 1.0 0.729092
511 GO:0030182 0.721830 Neuron differentiation (1) 5.0 1.0 0.726501
806 GO:0051056 0.302828 Regulation of small gtpase mediated signal transduction (1) 3.0 1.0 0.724275
527 GO:0007528 0.109807 Neuromuscular junction development (1) 1.0 1.0 0.722229
794 GO:0043434 0.332497 Response to peptide hormone (1) 3.0 1.0 0.716778
99 GO:0001934 0.658374 Positive regulation of protein phosphorylation (1) 3.0 1.0 0.708241
896 GO:0097193 0.400210 Intrinsic apoptotic signaling pathway (1) 3.0 1.0 0.705651
624 GO:0010628 0.702924 Positive regulation of gene expression (1) 3.0 1.0 0.704419
722 GO:0042325 0.780828 Regulation of phosphorylation (1) 5.0 1.0 0.698909
752 GO:0043170 0.942545 Macromolecule metabolic process (1) 7.0 1.0 0.693446
517 GO:0043586 0.209986 Tongue development (1) 1.0 1.0 0.692360
182 GO:0016070 0.778891 Rna metabolic process (1) 5.0 1.0 0.687621
281 GO:0006811 0.500000 Ion transport (1) 6.0 1.0 0.686632
586 GO:2000773 0.121640 Negative regulation of cellular senescence (1) 0.0 1.0 0.685597
808 GO:1902532 0.449154 Negative regulation of intracellular signal transduction (1) 3.0 1.0 0.684968
570 GO:0045597 0.628072 Positive regulation of cell differentiation (1) 3.0 1.0 0.680798
76 GO:0001819 0.388417 Positive regulation of cytokine production (1) 2.0 1.0 0.679733
934 GO:0051258 0.274092 Protein polymerization (1) 3.0 1.0 0.677814
713 GO:0035726 0.096936 Common myeloid progenitor cell proliferation (1) 0.0 1.0 0.667842
386 GO:0007049 0.727375 Cell cycle (1) 6.0 1.0 0.665704
473 GO:0008584 0.350294 Male gonad development (1) 1.0 1.0 0.663186
813 GO:0051898 0.103239 Negative regulation of protein kinase b signaling (1) 0.0 1.0 0.653209
698 GO:0070663 0.318299 Regulation of leukocyte proliferation (1) 2.0 1.0 0.652091
729 GO:0120035 0.514954 Regulation of plasma membrane bounded cell projection organization (1) 3.0 1.0 0.631426
881 GO:0031929 0.171840 Tor signaling (1) 2.0 1.0 0.624585
496 GO:0048608 0.406699 Reproductive structure development (1) 2.0 1.0 0.619202
559 GO:0022414 0.641754 Reproductive process (1) 4.0 1.0 0.617772
562 GO:0050896 0.963328 Response to stimulus (1) 7.0 1.0 0.605804
381 GO:0008064 0.113103 Regulation of actin polymerization or depolymerization (1) 2.0 1.0 0.603158
855 GO:1903578 0.141298 Regulation of atp metabolic process (1) 1.0 1.0 0.602865
619 GO:0050790 0.825081 Regulation of catalytic activity (1) 4.0 1.0 0.598054
476 GO:0048714 0.112165 Positive regulation of oligodendrocyte differentiation (1) 0.0 1.0 0.593772
363 GO:0051494 0.232679 Negative regulation of cytoskeleton organization (1) 2.0 1.0 0.593448
652 GO:0042180 0.090402 Cellular ketone metabolic process (1) 3.0 1.0 0.590857
423 GO:1902533 0.530199 Positive regulation of intracellular signal transduction (1) 2.0 1.0 0.581066
319 GO:0006909 0.217477 Phagocytosis (1) 2.0 1.0 0.578885
353 GO:0006954 0.482633 Inflammatory response (1) 3.0 1.0 0.578336
406 GO:0048041 0.319689 Focal adhesion assembly (1) 1.0 1.0 0.577606
585 GO:0048589 0.599054 Developmental growth (1) 4.0 1.0 0.575879
461 GO:0050804 0.367884 Modulation of chemical synaptic transmission (1) 3.0 1.0 0.568933
711 GO:0019752 0.316625 Carboxylic acid metabolic process (1) 4.0 1.0 0.563481
629 GO:0051174 0.783163 Regulation of phosphorus metabolic process (1) 6.0 1.0 0.558257
82 GO:0032743 0.079738 Positive regulation of interleukin-2 production (1) 0.0 1.0 0.545266
510 GO:0042063 0.408279 Gliogenesis (1) 3.0 1.0 0.539129
730 GO:0031175 0.631148 Neuron projection development (1) 4.0 1.0 0.537147
741 GO:0016477 0.682158 Cell migration (1) 4.0 1.0 0.536651
663 GO:0050821 0.165950 Protein stabilization (1) 0.0 1.0 0.535214
343 GO:1902166 0.145254 Negative regulation of intrinsic apoptotic signaling pathway in response to dna damage by p53 class mediator (1) 0.0 1.0 0.525494
213 GO:0006396 0.268052 Rna processing (1) 4.0 1.0 0.523646
563 GO:1900272 0.063979 Negative regulation of long-term synaptic potentiation (1) 0.0 1.0 0.517858
36 GO:0000723 0.137533 Telomere maintenance (1) 1.0 1.0 0.515446
524 GO:0007519 0.230417 Skeletal muscle tissue development (1) 2.0 1.0 0.511692
93 GO:0001843 0.082584 Neural tube closure (1) 1.0 1.0 0.491845
226 GO:0035304 0.232643 Regulation of protein dephosphorylation (1) 2.0 1.0 0.490892
684 GO:0051770 0.158964 Positive regulation of nitric-oxide synthase biosynthetic process (1) 0.0 1.0 0.483868
583 GO:0032967 0.182852 Positive regulation of collagen biosynthetic process (1) 0.0 1.0 0.478922
641 GO:0071417 0.494543 Cellular response to organonitrogen compound (1) 3.0 1.0 0.475108
788 GO:0009410 0.407937 Response to xenobiotic stimulus (1) 2.0 1.0 0.474799
412 GO:0009966 0.816584 Regulation of signal transduction (1) 5.0 1.0 0.474348
735 GO:0046718 0.151222 Viral entry into host cell (1) 1.0 1.0 0.472175
266 GO:0030163 0.372430 Protein catabolic process (1) 4.0 1.0 0.453053
196 GO:0006357 0.230696 Regulation of transcription by rna polymerase ii (1) 3.0 1.0 0.447389
840 GO:0043154 0.084113 Negative regulation of cysteine-type endopeptidase activity involved in apoptotic process (1) 1.0 1.0 0.427212
299 GO:0030705 0.133167 Cytoskeleton-dependent intracellular transport (1) 3.0 1.0 0.424567
433 GO:0097191 0.302625 Extrinsic apoptotic signaling pathway (1) 3.0 1.0 0.420731
610 GO:0031333 0.191284 Negative regulation of protein-containing complex assembly (1) 2.0 1.0 0.417629
501 GO:0007507 0.480118 Heart development (1) 4.0 1.0 0.417365
901 GO:0042110 0.432222 T cell activation (1) 4.0 1.0 0.413843
260 GO:0006807 0.931522 Nitrogen compound metabolic process (1) 7.0 1.0 0.413085
785 GO:0009266 0.178944 Response to temperature stimulus (1) 2.0 1.0 0.412491
358 GO:0007005 0.427023 Mitochondrion organization (1) 3.0 1.0 0.412213
377 GO:0031032 0.257340 Actomyosin structure organization (1) 2.0 1.0 0.410068
484 GO:0007399 0.731066 Nervous system development (1) 6.0 1.0 0.409544
136 GO:0002376 0.760395 Immune system process (1) 6.0 1.0 0.409057
211 GO:0031507 0.200984 Heterochromatin assembly (1) 1.0 1.0 0.406540
20 GO:0070507 0.234661 Regulation of microtubule cytoskeleton organization (1) 2.0 1.0 0.406533
356 GO:0006996 0.787638 Organelle organization (1) 5.0 1.0 0.400534
376 GO:0007015 0.344878 Actin filament organization (1) 3.0 1.0 0.399824
173 GO:0003300 0.147997 Cardiac muscle hypertrophy (1) 2.0 1.0 0.399424
620 GO:0051098 0.363823 Regulation of binding (1) 3.0 1.0 0.394294
922 GO:0036324 0.117627 Vascular endothelial growth factor receptor-2 signaling pathway (1) 0.0 1.0 0.392320
487 GO:0030325 0.087339 Adrenal gland development (1) 0.0 1.0 0.389651
781 GO:0008625 0.179216 Extrinsic apoptotic signaling pathway via death domain receptors (1) 1.0 1.0 0.375455
608 GO:0043114 0.067682 Regulation of vascular permeability (1) 1.0 1.0 0.368329
103 GO:0042531 0.243583 Positive regulation of tyrosine phosphorylation of stat protein (1) 0.0 1.0 0.365635
102 GO:0050731 0.126350 Positive regulation of peptidyl-tyrosine phosphorylation (1) 2.0 1.0 0.364186
504 GO:0048709 0.176248 Oligodendrocyte differentiation (1) 2.0 1.0 0.355145
324 GO:0016241 0.118988 Regulation of macroautophagy (1) 2.0 1.0 0.349328
218 GO:0006468 0.771378 Protein phosphorylation (1) 5.0 1.0 0.348022
171 GO:0003014 0.201637 Renal system process (1) 2.0 1.0 0.341557
644 GO:0071230 0.284292 Cellular response to amino acid stimulus (1) 1.0 1.0 0.332289
321 GO:0010507 0.205494 Negative regulation of autophagy (1) 1.0 1.0 0.332037
533 GO:0030168 0.219923 Platelet activation (1) 2.0 1.0 0.322598
546 GO:0008104 0.599923 Protein localization (1) 5.0 1.0 0.318133
362 GO:0033043 0.644550 Regulation of organelle organization (1) 4.0 1.0 0.315090
851 GO:0070933 0.058559 Histone h4 deacetylation (1) 0.0 1.0 0.313671
460 GO:0023061 0.159325 Signal release (1) 4.0 1.0 0.312808
438 GO:0007179 0.169016 Transforming growth factor beta receptor signaling pathway (1) 1.0 1.0 0.304505
300 GO:0032386 0.384834 Regulation of intracellular transport (1) 2.0 1.0 0.296704
920 GO:0036092 0.063107 Phosphatidylinositol-3-phosphate biosynthetic process (1) 0.0 1.0 0.293434
505 GO:0007283 0.290337 Spermatogenesis (1) 2.0 1.0 0.287750
357 GO:0006997 0.062583 Nucleus organization (1) 2.0 1.0 0.284535
748 GO:0009056 0.627534 Catabolic process (1) 5.0 1.0 0.276960
48 GO:0001525 0.386728 Angiogenesis (1) 2.0 1.0 0.264850
541 GO:0008542 0.131766 Visual learning (1) 0.0 1.0 0.264386
98 GO:0001932 0.669289 Regulation of protein phosphorylation (1) 4.0 1.0 0.263137
734 GO:0051702 0.152779 Biological process involved in interaction with symbiont (1) 2.0 1.0 0.263093
243 GO:0006606 0.250144 Protein import into nucleus (1) 1.0 1.0 0.261493
513 GO:0030900 0.289030 Forebrain development (1) 3.0 1.0 0.257354
673 GO:0043392 0.109472 Negative regulation of dna binding (1) 1.0 1.0 0.255228
346 GO:0006936 0.187431 Muscle contraction (1) 3.0 1.0 0.250863
552 GO:0033365 0.343027 Protein localization to organelle (1) 3.0 1.0 0.239223
9 GO:0043408 0.476159 Regulation of mapk cascade (1) 2.0 1.0 0.238056
887 GO:0034504 0.294393 Protein localization to nucleus (1) 2.0 1.0 0.236099
598 GO:0043254 0.240098 Regulation of protein-containing complex assembly (1) 3.0 1.0 0.234151
280 GO:0006810 0.773180 Transport (1) 7.0 1.0 0.229528
267 GO:0045732 0.069485 Positive regulation of protein catabolic process (1) 2.0 1.0 0.220326
668 GO:0010976 0.084952 Positive regulation of neuron projection development (1) 1.0 1.0 0.216502
212 GO:0051090 0.131395 Regulation of dna-binding transcription factor activity (1) 2.0 1.0 0.215454
799 GO:0009743 0.121060 Response to carbohydrate (1) 2.0 1.0 0.214795
675 GO:0071277 0.074147 Cellular response to calcium ion (1) 0.0 1.0 0.211553
495 GO:0060976 0.094845 Coronary vasculature development (1) 1.0 1.0 0.210353
803 GO:0042475 0.125550 Odontogenesis of dentin-containing tooth (1) 2.0 1.0 0.208932
671 GO:0032092 0.120001 Positive regulation of protein binding (1) 1.0 1.0 0.204806
772 GO:0060485 0.320964 Mesenchyme development (1) 3.0 1.0 0.203758
121 GO:0090050 0.180206 Positive regulation of cell migration involved in sprouting angiogenesis (1) 0.0 1.0 0.202697
801 GO:0033143 0.078487 Regulation of intracellular steroid hormone receptor signaling pathway (1) 1.0 1.0 0.199503
870 GO:0070527 0.179383 Platelet aggregation (1) 1.0 1.0 0.197119
314 GO:0070588 0.238228 Calcium ion transmembrane transport (1) 4.0 1.0 0.196919
656 GO:0030154 0.829241 Cell differentiation (1) 6.0 1.0 0.193962
645 GO:0071300 0.128940 Cellular response to retinoic acid (1) 0.0 1.0 0.193772
897 GO:0035767 0.138830 Endothelial cell chemotaxis (1) 1.0 1.0 0.192330
332 GO:0097190 0.410188 Apoptotic signaling pathway (1) 4.0 1.0 0.189603
643 GO:0071222 0.143456 Cellular response to lipopolysaccharide (1) 2.0 1.0 0.189188
898 GO:0035924 0.211788 Cellular response to vascular endothelial growth factor stimulus (1) 2.0 1.0 0.184593
459 GO:0007267 0.506882 Cell-cell signaling (1) 5.0 1.0 0.184371
323 GO:0016236 0.245469 Macroautophagy (1) 3.0 1.0 0.183750
317 GO:0006897 0.277768 Endocytosis (1) 3.0 1.0 0.178141
244 GO:0042307 0.225164 Positive regulation of protein import into nucleus (1) 0.0 1.0 0.177873
728 GO:0030032 0.140303 Lamellipodium assembly (1) 1.0 1.0 0.163285
640 GO:0071310 0.661841 Cellular response to organic substance (1) 4.0 1.0 0.156044
686 GO:0097009 0.055448 Energy homeostasis (1) 0.0 1.0 0.155920
692 GO:0010595 0.235403 Positive regulation of endothelial cell migration (1) 2.0 1.0 0.154755
309 GO:0034765 0.277608 Regulation of ion transmembrane transport (1) 4.0 1.0 0.153881
770 GO:0009887 0.516411 Animal organ morphogenesis (1) 4.0 1.0 0.152678
209 GO:0006338 0.181855 Chromatin remodeling (1) 2.0 1.0 0.147943
547 GO:0032880 0.479479 Regulation of protein localization (1) 4.0 1.0 0.144928
918 GO:0046854 0.054745 Phosphatidylinositol phosphate biosynthetic process (1) 1.0 1.0 0.142413
329 GO:0043066 0.610305 Negative regulation of apoptotic process (1) 4.0 1.0 0.132954
824 GO:0071363 0.407544 Cellular response to growth factor stimulus (1) 3.0 1.0 0.122808
603 GO:0061045 0.068278 Negative regulation of wound healing (1) 2.0 1.0 0.122775
225 GO:0006470 0.305848 Protein dephosphorylation (1) 3.0 1.0 0.111956
388 GO:0051726 0.483775 Regulation of cell cycle (1) 5.0 1.0 0.109432
516 GO:0007423 0.372363 Sensory organ development (1) 3.0 1.0 0.106514
261 GO:0051171 0.836045 Regulation of nitrogen compound metabolic process (1) 6.0 1.0 0.100916
375 GO:0051496 0.076120 Positive regulation of stress fiber assembly (1) 0.0 1.0 0.097110
758 GO:0031099 0.247922 Regeneration (1) 2.0 1.0 0.096133
55 GO:0001649 0.136472 Osteoblast differentiation (1) 1.0 1.0 0.091104
227 GO:0032516 0.155024 Positive regulation of phosphoprotein phosphatase activity (1) 0.0 1.0 0.090968
191 GO:0045944 0.420524 Positive regulation of transcription by rna polymerase ii (1) 2.0 1.0 0.084844
283 GO:0051049 0.611523 Regulation of transport (1) 5.0 1.0 0.084359
739 GO:1902903 0.245529 Regulation of supramolecular fiber organization (1) 3.0 1.0 0.083258
569 GO:0030307 0.190101 Positive regulation of cell growth (1) 2.0 1.0 0.078819
679 GO:0042310 0.060677 Vasoconstriction (1) 1.0 1.0 0.075349
405 GO:0007159 0.162103 Leukocyte cell-cell adhesion (1) 3.0 1.0 0.074992
566 GO:0032879 0.660318 Regulation of localization (1) 6.0 1.0 0.074219
568 GO:0008284 0.513916 Positive regulation of cell population proliferation (1) 2.0 1.0 0.073069
561 GO:0048511 0.251652 Rhythmic process (1) 3.0 1.0 0.067632
702 GO:0048146 0.179210 Positive regulation of fibroblast proliferation (1) 0.0 1.0 0.067004
403 GO:0033628 0.059919 Regulation of cell adhesion mediated by integrin (1) 1.0 1.0 0.061975
885 GO:1900180 0.205300 Regulation of protein localization to nucleus (1) 1.0 1.0 0.060202
228 GO:0006508 0.347532 Proteolysis (1) 4.0 1.0 0.056727
560 GO:0043473 0.188137 Pigmentation (1) 2.0 1.0 0.036612
777 GO:0050680 0.103322 Negative regulation of epithelial cell proliferation (1) 2.0 1.0 0.036383
751 GO:0046034 0.094236 Atp metabolic process (1) 2.0 1.0 0.034832
591 GO:0010941 0.705559 Regulation of cell death (1) 5.0 1.0 0.026616
818 GO:0010243 0.557617 Response to organonitrogen compound (1) 4.0 1.0 0.022040
724 GO:0036473 0.154959 Cell death in response to oxidative stress (1) 2.0 1.0 0.021429
153 GO:0050776 0.424435 Regulation of immune response (1) 4.0 1.0 0.011047
521 GO:0035051 0.187551 Cardiocyte differentiation (1) 2.0 1.0 0.003905
194 GO:0006355 0.544373 Regulation of transcription, dna-templated (1) 4.0 1.0 0.003607
622 GO:0043086 0.212387 Negative regulation of catalytic activity (1) 3.0 1.0 -0.003920
494 GO:0060840 0.094489 Artery development (1) 2.0 1.0 -0.013518
936 GO:0051000 0.059868 Positive regulation of nitric-oxide synthase activity (1) 0.0 1.0 -0.017649
382 GO:0030041 0.093571 Actin filament polymerization (1) 2.0 1.0 -0.024295
712 GO:0033002 0.280632 Muscle cell proliferation (1) 2.0 1.0 -0.031117
750 GO:0044281 0.305872 Small molecule metabolic process (1) 5.0 1.0 -0.053702
257 GO:0046488 0.135113 Phosphatidylinositol metabolic process (1) 2.0 1.0 -0.069795
731 GO:0031529 0.036028 Ruffle organization (1) 1.0 1.0 -0.085835
469 GO:0060173 0.072132 Limb development (1) 1.0 1.0 -0.088184
296 GO:0015031 0.292221 Protein transport (1) 4.0 1.0 -0.098692
927 GO:0042632 0.034870 Cholesterol homeostasis (1) 0.0 1.0 -0.119705
78 GO:0032760 0.100402 Positive regulation of tumor necrosis factor production (1) 0.0 1.0 -0.132748
856 GO:0019722 0.034146 Calcium-mediated signaling (1) 2.0 1.0 -0.141431
288 GO:0032940 0.217398 Secretion by cell (1) 5.0 1.0 -0.147194
705 GO:0051353 0.082787 Positive regulation of oxidoreductase activity (1) 1.0 1.0 -0.158575
176 GO:0044262 0.129234 Cellular carbohydrate metabolic process (1) 3.0 1.0 -0.190099
180 GO:0019318 0.046251 Hexose metabolic process (1) 2.0 1.0 -0.211363
440 GO:0030512 0.045444 Negative regulation of transforming growth factor beta receptor signaling pathway (1) 0.0 1.0 -0.229806
331 GO:0071887 0.072964 Leukocyte apoptotic process (1) 2.0 1.0 -0.295526
823 GO:0045471 0.068117 Response to ethanol (1) 1.0 1.0 -0.639920
232 GO:0010951 0.025246 Negative regulation of endopeptidase activity (1) 2.0 1.0 -0.917276
names2 = list(probabilities_mod.loc[probabilities_mod["predictions"] ==1].loc[probabilities_mod["layer_number"] <=7].sort_values(by=["delta_logits"], ascending=False)["Name"].head(30))
terms2 = list(probabilities_mod.loc[probabilities_mod["predictions"] ==1].loc[probabilities_mod["layer_number"] <=7].sort_values(by=["delta_logits"], ascending=False)["GO_term"].head(30))
logits2 = list(probabilities_mod.loc[probabilities_mod["predictions"] ==1].loc[probabilities_mod["layer_number"] <=7].sort_values(by=["delta_logits"], ascending=False)["delta_logits"].head(30))

names2 = [x[:-4] for x in names2] 
for i in range(0,len(names2)):
    print(terms2[i],names2[i],logits2[i])
GO:0031047 Gene silencing by rna 2.192457619336144
GO:0031648 Protein destabilization 2.092833916210919
GO:0071900 Regulation of protein serine/threonine kinase activity 1.944569179670069
GO:0000423 Mitophagy 1.8800065969407627
GO:0048266 Behavioral response to pain 1.8570307139212263
GO:0090630 Activation of gtpase activity 1.793943480641404
GO:0045727 Positive regulation of translation 1.7460704310285706
GO:0018108 Peptidyl-tyrosine phosphorylation 1.6728257190053135
GO:0043244 Regulation of protein-containing complex disassembly 1.6460980615310405
GO:0006612 Protein targeting to membrane 1.6268405228374492
GO:0006469 Negative regulation of protein kinase activity 1.5937943848967007
GO:0034976 Response to endoplasmic reticulum stress 1.575980420814227
GO:0071353 Cellular response to interleukin-4 1.5752555355925209
GO:0033554 Cellular response to stress 1.573975479674923
GO:0033993 Response to lipid 1.5515734212063519
GO:0001817 Regulation of cytokine production 1.5347154822720621
GO:0007026 Negative regulation of microtubule depolymerization 1.5337813437727923
GO:0007565 Female pregnancy 1.4788810435976616
GO:0046777 Protein autophosphorylation 1.453821209026271
GO:0060179 Male mating behavior 1.4409006464912832
GO:0000422 Autophagy of mitochondrion 1.4098807478143418
GO:0030216 Keratinocyte differentiation 1.406529668352229
GO:0048812 Neuron projection morphogenesis 1.3850039160074457
GO:0001553 Luteinization 1.3733255324441331
GO:0002718 Regulation of cytokine production involved in immune response 1.3728274508963876
GO:0042060 Wound healing 1.325910525701464
GO:0060632 Regulation of microtubule-based movement 1.3061039503079361
GO:0065003 Protein-containing complex assembly 1.304383769510548
GO:0060020 Bergmann glial cell differentiation 1.2886961413918279
GO:1902459 Positive regulation of stem cell population maintenance 1.2787216247479065
# import libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

# set font
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = 'Roboto'

# set the style of the axes and the text color
plt.rcParams['axes.edgecolor']='#333F4B'
plt.rcParams['axes.linewidth']=0.8
plt.rcParams['xtick.color']='#333F4B'
plt.rcParams['ytick.color']='#333F4B'
plt.rcParams['text.color']='#333F4B'


# create some fake data
percentages = pd.Series(logits2, 
                        index=names2)
df = pd.DataFrame({'percentage' : percentages})
df = df.sort_values(by='percentage')

# we first need a numeric placeholder for the y axis
my_range=list(range(1,len(df.index)+1))

fig, ax = plt.subplots(figsize=(4,17))

# create for each expense type an horizontal line that starts at x = 0 with the length 
# represented by the specific expense percentage value.
plt.hlines(y=my_range, xmin=0, xmax=df['percentage'], color='#208EA3', alpha=0.2, linewidth=14)

# create for each expense type a dot at the level of the expense percentage value
plt.plot(df['percentage'], my_range, "o", markersize=14, color='#208EA3', alpha=0.8)

# set labels
ax.set_xlabel(' Δlogit', fontsize=25, fontweight='black', color = '#36382E')
ax.set_ylabel('')
ax.set_facecolor(color="white")
ax.set_alpha(1)

# set axis
ax.tick_params(axis='both', which='major', labelsize=30)
plt.yticks(my_range, df.index)

# add an horizonal label for the y axis 
fig.text(-0.58, 0.862, 'MoA (GO terms)', fontsize=27, fontweight='black', color = '#36382E')
fig.text(0.2, 0.9, selected_drug_u_name.capitalize(), fontsize=30, fontweight='black', color = '#36382E')


# change the style of the axis spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

ax.spines['left'].set_bounds((1, len(my_range)))
ax.set_xlim(0,max(logits2)+0.1)

ax.spines['left'].set_position(('outward', 8))
ax.spines['bottom'].set_position(('outward', 5))

plt.savefig(resultsdir+selected_drug_u_name+'_top_terms.png', dpi=300, bbox_inches='tight')

For known drug…

display(combobox)
selected_drug_name = combobox.result
# LOS LOGITS DE TEST!!
train_drug_logs = pd.DataFrame(platt_matrix.loc[:,selected_drug_name]).reset_index()
train_drug_logs.columns  = ["GO_term","probability"]
train_drug_logs = train_drug_logs.merge(real_go_info_svm, on="GO_term")
train_drug_logs.loc[train_drug_logs["layer_number"] <=3].sort_values(by=["probability"], ascending=False).head(30)
GO_term probability Name layer_number
423 GO:1902533 0.825532 Positive regulation of intracellular signal transduction (1) 2.0
99 GO:0001934 0.823688 Positive regulation of protein phosphorylation (1) 3.0
633 GO:0051301 0.817491 Cell division (1) 2.0
224 GO:0046777 0.780375 Protein autophosphorylation (1) 1.0
8 GO:0000165 0.778813 Mapk cascade (1) 3.0
624 GO:0010628 0.741437 Positive regulation of gene expression (1) 3.0
253 GO:0043552 0.735601 Positive regulation of phosphatidylinositol 3-kinase activity (1) 0.0
437 GO:0007169 0.646597 Transmembrane receptor protein tyrosine kinase signaling pathway (1) 3.0
653 GO:0072593 0.631785 Reactive oxygen species metabolic process (1) 3.0
894 GO:0048017 0.609803 Inositol lipid-mediated signaling (1) 1.0
578 GO:2000379 0.603184 Positive regulation of reactive oxygen species metabolic process (1) 1.0
24 GO:0007346 0.596523 Regulation of mitotic cell cycle (1) 3.0
573 GO:0010629 0.592396 Negative regulation of gene expression (1) 3.0
908 GO:0051899 0.591286 Membrane depolarization (1) 2.0
906 GO:0043549 0.586633 Regulation of kinase activity (1) 3.0
105 GO:0045860 0.585544 Positive regulation of protein kinase activity (1) 2.0
12 GO:0070374 0.584467 Positive regulation of erk1 and erk2 cascade (1) 0.0
74 GO:0001817 0.527795 Regulation of cytokine production (1) 3.0
44 GO:0048812 0.526118 Neuron projection morphogenesis (1) 3.0
853 GO:0038083 0.500000 Peptidyl-tyrosine autophosphorylation (1) 0.0
223 GO:0018108 0.493038 Peptidyl-tyrosine phosphorylation (1) 3.0
9 GO:0043408 0.472479 Regulation of mapk cascade (1) 2.0
570 GO:0045597 0.464440 Positive regulation of cell differentiation (1) 3.0
333 GO:1904019 0.457451 Epithelial cell apoptotic process (1) 1.0
702 GO:0048146 0.440487 Positive regulation of fibroblast proliferation (1) 0.0
80 GO:0010575 0.438910 Positive regulation of vascular endothelial growth factor production (1) 0.0
106 GO:0071900 0.437003 Regulation of protein serine/threonine kinase activity (1) 2.0
141 GO:0050900 0.435245 Leukocyte migration (1) 3.0
10 GO:0051403 0.429898 Stress-activated mapk cascade (1) 2.0
11 GO:0043406 0.426305 Positive regulation of map kinase activity (1) 1.0
# # For known drugs
len((set(train_drug_logs.loc[train_drug_logs["layer_number"] <=3].sort_values(by=["probability"], ascending=False).head(30)["GO_term"])).intersection(set(pd.DataFrame(compounds_GOterms_matches[selected_drug_name])[1])))
30
ax = sns.boxplot(x=slim_matrix_single_neuron.loc[train_drug_logs["GO_term"],selected_drug_name], y=train_drug_logs.set_index("GO_term")["probability"], data=plot,showfliers=True )

# same as before
sum_annotations = slim_matrix_single_neuron.T.sum()/slim_matrix_single_neuron.shape[1]
logits_apriori= np.log(sum_annotations/(1-sum_annotations))
logits_apost= np.log(train_drug_logs["probability"]/(1-train_drug_logs["probability"]))
delta_logits = logits_apost.to_numpy()-logits_apriori.to_numpy()
delta_logits_df = pd.DataFrame(delta_logits)
delta_logits_df.columns = ["delta_logits"]
train_drug_mod = train_drug_logs.merge(delta_logits_df, left_index=True,right_index=True)
train_drug_mod.loc[train_drug_mod["layer_number"] <=3].sort_values(by=["delta_logits"], ascending=False).head(30)
GO_term probability Name layer_number delta_logits
578 GO:2000379 0.603184 Positive regulation of reactive oxygen species metabolic process (1) 1.0 2.770126
253 GO:0043552 0.735601 Positive regulation of phosphatidylinositol 3-kinase activity (1) 0.0 2.707570
80 GO:0010575 0.438910 Positive regulation of vascular endothelial growth factor production (1) 0.0 2.282492
633 GO:0051301 0.817491 Cell division (1) 2.0 2.245231
224 GO:0046777 0.780375 Protein autophosphorylation (1) 1.0 2.053782
423 GO:1902533 0.825532 Positive regulation of intracellular signal transduction (1) 2.0 2.014408
458 GO:0035025 0.374269 Positive regulation of rho protein signal transduction (1) 0.0 1.952270
848 GO:0071670 0.345148 Smooth muscle cell chemotaxis (1) 0.0 1.887644
348 GO:0006939 0.412178 Smooth muscle contraction (1) 2.0 1.842256
350 GO:0045987 0.348868 Positive regulation of smooth muscle contraction (1) 1.0 1.783402
908 GO:0051899 0.591286 Membrane depolarization (1) 2.0 1.675960
653 GO:0072593 0.631785 Reactive oxygen species metabolic process (1) 3.0 1.650121
115 GO:0060312 0.275354 Regulation of blood vessel remodeling (1) 0.0 1.625762
926 GO:0060020 0.369360 Bergmann glial cell differentiation (1) 0.0 1.614859
923 GO:0048170 0.295742 Positive regulation of long-term neuronal synaptic plasticity (1) 0.0 1.598558
99 GO:0001934 0.823688 Positive regulation of protein phosphorylation (1) 3.0 1.593723
853 GO:0038083 0.500000 Peptidyl-tyrosine autophosphorylation (1) 0.0 1.588712
713 GO:0035726 0.210551 Common myeloid progenitor cell proliferation (1) 0.0 1.577983
445 GO:0048008 0.414654 Platelet-derived growth factor receptor signaling pathway (1) 1.0 1.552362
857 GO:0035584 0.298388 Calcium-mediated signaling using intracellular calcium source (1) 0.0 1.496390
333 GO:1904019 0.457451 Epithelial cell apoptotic process (1) 1.0 1.481165
933 GO:0051150 0.337253 Regulation of smooth muscle cell differentiation (1) 1.0 1.474264
352 GO:0014827 0.254529 Intestine smooth muscle contraction (1) 0.0 1.453480
814 GO:0090037 0.235572 Positive regulation of protein kinase c signaling (1) 0.0 1.416277
894 GO:0048017 0.609803 Inositol lipid-mediated signaling (1) 1.0 1.399588
506 GO:0007286 0.240113 Spermatid development (1) 1.0 1.376021
742 GO:0035733 0.239304 Hepatic stellate cell activation (1) 0.0 1.371582
10 GO:0051403 0.429898 Stress-activated mapk cascade (1) 2.0 1.369506
702 GO:0048146 0.440487 Positive regulation of fibroblast proliferation (1) 0.0 1.349525
782 GO:1902042 0.289337 Negative regulation of extrinsic apoptotic signaling pathway via death domain receptors (1) 0.0 1.347889
ax = sns.boxplot(x=slim_matrix_single_neuron.loc[train_drug_mod["GO_term"],selected_drug_name], y=train_drug_mod.set_index("GO_term")["delta_logits"], data=plot,showfliers=True)

SVM GO TERM 2D representation

from sklearn.manifold import TSNE
import plotly.express as px

Choose go to study…

display(combobox_go)
selected_goterm = combobox_go.result
real_go_info[real_go_info["GO_term"]==selected_goterm+"_1"]
GO_term Name layer_number
4338 GO:0071353_1 Cellular response to interleukin-4 (1) 1.0
list_nodes = []
for i in range(1,7):
    list_nodes.append(selected_goterm+"_"+str(i))

score = attribution_data_annotated.loc[list_nodes].T
score_mod = score.divide(score.std()).fillna(0) 
annotations  =slim_matrix_single_neuron.loc[selected_goterm,]
y_predicted = models_svm[selected_goterm].predict(score_mod.astype(float))

Plot SVM

View statistics of GOterm

“Perfect” model (with train data)

auc = metrics.roc_auc_score(annotations, models_svm[selected_goterm].decision_function(score_mod.astype(float)))
cnf_matrix = metrics.confusion_matrix(annotations,y_predicted)
print(cnf_matrix)

print("Accuracy:",metrics.accuracy_score(annotations, y_predicted))
print("Precision:",metrics.precision_score(annotations,y_predicted)) # TP / (TP+FP)
print("Recall:",metrics.recall_score(annotations, y_predicted)) #TP / (TP+FN)
print("AUC with score:",auc) 
[[206   6]
 [  2  16]]
Accuracy: 0.9652173913043478
Precision: 0.7272727272727273
Recall: 0.8888888888888888
AUC with score: 0.9855870020964361

TN - FP

FN - TP

En mi opinion interesa mucho el precision, prefiero que haya menos FP no??

Test statistics…

auc = metrics.roc_auc_score(slim_matrix_single_neuron.loc[selected_goterm],  platt_matrix.loc[selected_goterm])
cnf_matrix = metrics.confusion_matrix(slim_matrix_single_neuron.loc[selected_goterm], preds_svm_matrix.loc[selected_goterm])
print(cnf_matrix)

print("Accuracy:",metrics.accuracy_score(slim_matrix_single_neuron.loc[selected_goterm], preds_svm_matrix.loc[selected_goterm]))
print("Precision:",metrics.precision_score(slim_matrix_single_neuron.loc[selected_goterm], preds_svm_matrix.loc[selected_goterm]))
print("Recall:",metrics.recall_score(slim_matrix_single_neuron.loc[selected_goterm], preds_svm_matrix.loc[selected_goterm])) #TP / (TP+FN)
print("AUC with score:",auc) #TP / (TP+FN)
[[203   9]
 [  4  14]]
Accuracy: 0.9434782608695652
Precision: 0.6086956521739131
Recall: 0.7777777777777778
AUC with score: 0.9095911949685536
import colorlover as cl
matrix = metrics.confusion_matrix(annotations,y_predicted)
tn, fp, fn, tp = matrix.ravel()

values = [tp, fn, fp, tn]
label_text = ["True Positive", "False Negative", "False Positive", "True Negative"]
labels = ["<b>TP</b>", "<b>FN</b>", "<b>FP</b>", "<b>TN</b>"]
blue = cl.flipper()["seq"]["9"]["Blues"]
red = cl.flipper()["seq"]["9"]["Reds"]
colors = ["#ff3700","#FFA0A0", "#CCE9FF",  "#0b8bff"]
trace0 = go.Pie(
    labels=label_text,
    values=values,
    hoverinfo="label+value+percent",
    textinfo="text+value",
    text=labels,
    sort=False,
    marker=dict(colors=colors),
    insidetextfont={"color": "#36382E"},
    rotation=90,
)

layout = go.Layout(
    title=dict(text="Confusion Matrix",
              x=0.3,
              y=0.8,
              font=dict(size=14),
              xanchor='center',
              yanchor='top'),
    #margin=dict(l=50, r=50, t=100, b=10),
    legend=dict(font={"color": "#36382E"}, orientation="h",x=0.1, y=-0.03),
#    plot_bgcolor="#282b38",
#    paper_bgcolor="#282b38",
    font=dict(family='Roboto',color= "#36382E"),
)

data = [trace0]
figure = go.Figure(data=data, layout=layout)
figure
y_test=annotations
decision_test=y_predicted
fpr, tpr, threshold = metrics.roc_curve(y_test, decision_test)

# AUC Score
auc_score = metrics.roc_auc_score(y_true=y_test, y_score=decision_test)

trace0 = go.Scatter(
    x=fpr, y=tpr, mode="lines", name="Test Data", marker={"color": "#ff3700"}
)

layout = go.Layout(
    title=dict(text=f"ROC Curve (AUC = {auc_score:.3f})",
            x=0.6,
            y=0.5,
            font=dict(size=20)
              ),
    xaxis=dict(title="False Positive Rate", gridcolor="white"),
    yaxis=dict(title="True Positive Rate", gridcolor="white"),
    legend=dict(x=0, y=1.05, orientation="h"),
    margin=dict(l=100, r=10, t=25, b=40),
#    plot_bgcolor="#282b38",
#    paper_bgcolor="#282b38",
    font=dict(family='Roboto',color= "#36382E"),
)

data = [trace0]
figure = go.Figure(data=data, layout=layout)
figure

Plot SVM with unknown labels

Voronoi Tessellation

What is a Voronoi Tessellation? Given a set P := {p1, …, pn} of sites, a Voronoi Tessellation is a subdivision of the space into n cells, one for each site in P, with the property that a point q lies in the cell corresponding to a site pi iff d(pi, q) < d(pj, q) for i distinct from j. The segments in a Voronoi Tessellation correspond to all points in the plane equidistant to the two nearest sites. Voronoi Tessellations have applications in computer science.

https://stackoverflow.com/questions/61225052/svm-plot-decision-surface-when-working-with-more-than-2-features

tsne = TSNE(n_components=2, verbose=0,
           init="pca",
            perplexity=30,
            random_state=123
           )
z = tsne.fit_transform(score_mod.astype(float)) 
C:\Users\ksada\Anaconda3\envs\SparseGO\lib\site-packages\sklearn\manifold\_t_sne.py:790: FutureWarning:

The default learning rate in TSNE will change from 200.0 to 'auto' in 1.2.

C:\Users\ksada\Anaconda3\envs\SparseGO\lib\site-packages\sklearn\manifold\_t_sne.py:982: FutureWarning:

The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.
list_nodes = list(models_svm[selected_goterm].feature_names_in_) # Extract the feature names from the model (those are the attributions we need)
score_unknown = attribution_data_all.loc[list_nodes,unknown].T
score_unknown_mod = score_unknown.divide(score.std()).fillna(0) # normalize
y_unknown = np.full(score_unknown_mod.shape[0],2) # 2=unknown MOA
y_pred_unknown = models_svm[selected_goterm].predict(score_unknown_mod.astype(float))
# join scores and annotations from known and unknown drugs
all_score = pd.concat([score_mod,score_unknown_mod])
all_y = np.concatenate((annotations,y_unknown))  # 2=unknown MOA

Plot T-SNE SVM

from sklearn.neighbors._classification import KNeighborsClassifier
# https://github.com/plotly/dash-sample-apps/blob/main/apps/dash-svm/utils/dash_reusable_components.py
z = tsne.fit_transform(all_score.astype(float)) 
df = pd.DataFrame()
df["y"] = all_y
df["comp-1"] = z[:,0]
df["comp-2"] = z[:,1]
df["name"] =list(all_score.index)
df = df.sort_values(by=['y'])
df["y"] = df["y"].astype(str)
X,y = all_score.astype(float), all_y
y_predicted = models_svm[selected_goterm].predict(X)

resolution = 300 # 100x100 background pixels
X2d_xmin, X2d_xmax = np.min(z[:,0])-1, np.max(z[:,0])+1
X2d_ymin, X2d_ymax = np.min(z[:,1])-1, np.max(z[:,1])+1
xx, yy = np.meshgrid(np.linspace(X2d_xmin, X2d_xmax, resolution), np.linspace(X2d_ymin, X2d_ymax, resolution))

# approximate Voronoi tesselation on resolution x resolution grid using 1-NN
background_model = KNeighborsClassifier(n_neighbors=1).fit(z, y_predicted) 
voronoiBackground = background_model.predict(np.c_[xx.ravel(), yy.ravel()])
voronoiBackground = voronoiBackground.reshape((resolution, resolution))
C:\Users\ksada\Anaconda3\envs\SparseGO\lib\site-packages\sklearn\manifold\_t_sne.py:790: FutureWarning:

The default learning rate in TSNE will change from 200.0 to 'auto' in 1.2.

C:\Users\ksada\Anaconda3\envs\SparseGO\lib\site-packages\sklearn\manifold\_t_sne.py:982: FutureWarning:

The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.

C:\Users\ksada\Anaconda3\envs\SparseGO\lib\site-packages\sklearn\neighbors\_classification.py:228: FutureWarning:

Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.
go_name=real_go_info[real_go_info["GO_term"]==selected_goterm+"_1"]["Name"].values[0][:-4]
go_name
'Cellular response to interleukin-4'
bright_cscale = [[0, "#0b8bff"], [0.5, "#ff3700"],[1, "#36382E"]]
new_cscale = [[0, "#CCE9FF"], [1, "#FFA0A0"]]

trace0 = go.Contour(
        x=xx.flatten(),
        y=yy.flatten(),
        z=voronoiBackground.flatten(),
        hoverinfo="none",
        showscale=False,
        contours=dict(showlines=False),
        colorscale=new_cscale,
        opacity=0.9,
    )
    
trace1 = go.Contour(
    x=xx.flatten(),
    y=yy.flatten(),
    z=voronoiBackground.flatten(),
    showscale=False,
    hoverinfo="none",
    colorscale=new_cscale,
    line=dict(color="#ff3700"),
    )

trace2 = go.Scatter(
    x=df["comp-1"],
    y=df["comp-2"],
    mode="markers",
    text=df["name"].to_numpy(),
    marker=dict(size=7, color=df["y"].to_numpy(int),colorscale=bright_cscale),
    showlegend=False
)

legend1 = go.Scatter(
    x=[None],
    y=[None],
    mode="markers",
    name="Not annotated to<br>"+selected_goterm,
    marker=dict(size=7, color="#0b8bff",symbol='circle'),
)

legend2 = go.Scatter(
    x=[None],
    y=[None],
    mode="markers",
    name="Drug annotated to<br>"+selected_goterm,
    marker=dict(size=7, color="#ff3700",symbol='circle'),
)

legend3 = go.Scatter(
    x=[None],
    y=[None],
    mode="markers",
    name="Unknown MOA<br>annotations",
    marker=dict(size=7, color="#36382E",symbol='circle'),
)

    
layout = go.Layout(
   title=dict(text="<b>"+selected_goterm+"</b> "+go_name,
              x=0.5,
              y=0.92,
              font=dict(size=18),
              xanchor='center',
              yanchor='top'),
    xaxis=dict(ticks="", showticklabels=False, showgrid=False, zeroline=False),
    yaxis=dict(ticks="", showticklabels=False, showgrid=False, zeroline=False),
    yaxis_range=[min(yy.flatten()),max(yy.flatten())],
    xaxis_range=[min(xx.flatten()),max(xx.flatten())],
    legend=dict(x=0, y=0, orientation="h",font=dict(size=14)),
    paper_bgcolor='rgba(0,0,0,0)',
    width=600, height=800,
    font=dict(family='Roboto',color= "#36382E",size=15)
    )
data = [trace0,trace1,trace2,legend2,legend1,legend3]
figure = go.Figure(data=data,layout=layout)




figure